diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..2fcec4c5f --- /dev/null +++ b/.clang-format @@ -0,0 +1,72 @@ +--- +Language: Cpp +BasedOnStyle: Microsoft + +# Indentation +IndentWidth: 2 # 2 spaces per indent +AccessModifierOffset: -2 +IndentAccessModifiers: false # Align access modifiers to braces +NamespaceIndentation: All # Indent namspace contents +ConstructorInitializerIndentWidth: 2 + +# Comments which match this regex will be unformatted (and therefore can be longer or have more whitespace than other comments) +CommentPragmas: '^\*\*' + + +# Alignment +AlignConsecutiveAssignments: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + AlignCompound: true + PadOperators: true +AlignConsecutiveBitFields: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true +AlignConsecutiveDeclarations: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + # For future versions of clang-format + # AlignFunctionDeclarations: false + # AlignFunctionPointers: false +AlignConsecutiveMacros: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + +# Newlines +ColumnLimit: 0 +BreakBeforeBraces: Allman # Braces on their own lines +SeparateDefinitionBlocks: Always # Separate definitions (functions etc.) with an empty line +AlwaysBreakTemplateDeclarations: true # Put template on their own lines +AllowShortBlocksOnASingleLine: Never +# On a newer version of clang-format, replace with BinPackArguments: OnePerLine +BinPackArguments: false # Don't allow multiple function arguments on the same line unless they all fit +BinPackParameters: false # Same but for parameters +PackConstructorInitializers: NextLine +AllowShortFunctionsOnASingleLine: None +BreakBeforeBinaryOperators: NonAssignment # Put binary operators after a line break, rather than before +AllowShortIfStatementsOnASingleLine: Never + +# Spaces +SpaceBeforeParens: ControlStatementsExceptControlMacros +SpaceAfterCStyleCast: true +PointerAlignment: Left + +# Includes +IncludeBlocks: Regroup # Regroup includes based on config +IncludeCategories: + - Regex: '(^"|\.hpp)' # 'local' includes + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '\/' # Library includes + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' # Everything else + Priority: 1 + SortPriority: 0 + CaseSensitive: false diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 69003f8ee..21cdef9b6 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -30,4 +30,4 @@ }, "workspaceFolder": "/home/app/", "workspaceMount": "source=${localWorkspaceFolder},target=/home/app/,type=bind,consistency=cached" -} \ No newline at end of file +} diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index a49eab2f6..0086358db 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1 +1 @@ -blank_issues_enabled: true \ No newline at end of file +blank_issues_enabled: true diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index e26211e22..f77e9eb5c 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -36,4 +36,3 @@ _If this is a relatively large or complex change, kick off the discussion by explaining why you chose the solution you did and what alternatives you considered, etc._ - diff --git a/.github/workflows/.clang-format b/.github/workflows/.clang-format deleted file mode 100644 index b8cef3e95..000000000 --- a/.github/workflows/.clang-format +++ /dev/null @@ -1,9 +0,0 @@ -# Documentation about the style options and their meaning -# https://releases.llvm.org/12.0.0/tools/clang/docs/ClangFormatStyleOptions.html - -Language: Cpp -AccessModifierOffset: 0 -AlignConsecutiveMacros: true -BreakBeforeBraces: Linux -ColumnLimit: 150 -ConstructorInitializerIndentWidth: 2 diff --git a/.github/workflows/pre_commit.yaml b/.github/workflows/pre_commit.yaml new file mode 100644 index 000000000..c0c04ce92 --- /dev/null +++ b/.github/workflows/pre_commit.yaml @@ -0,0 +1,22 @@ +name: resolve-bot pre-commit + +# Won't run on develop/main directly +on: [pull_request] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.ref }} + - uses: actions/setup-python@v5.4.0 + - uses: pre-commit/action@v3.0.1 + - uses: EndBug/add-and-commit@v9.1.4 + # Only need to try and commit if the action failed + if: failure() + with: + fetch: false + committer_name: GitHub Actions + committer_email: actions@github.com + message: Apply pre-commmit fixes diff --git a/.gitlab/pnnl/incline.gitlab-ci.yml b/.gitlab/pnnl/incline.gitlab-ci.yml index 67614de8f..54521755b 100644 --- a/.gitlab/pnnl/incline.gitlab-ci.yml +++ b/.gitlab/pnnl/incline.gitlab-ci.yml @@ -45,4 +45,4 @@ failure: - .report-status rules: - when: on_failure - \ No newline at end of file + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..9e7c4c3a4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v19.1.7 + hooks: + - id: clang-format + types_or: [c++, c, cuda] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-toml + - id: forbid-new-submodules + - id: end-of-file-fixer + - id: check-yaml diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d4da16fa..72efe8b92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ #[[ -@brief ReSolve library root +@brief Re::Solve library root @author Slaven Peles diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b836299b1..6449b0a1c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -185,5 +185,3 @@ class Matrix // No, class is outside ReSolve namespace { // matrix code }; - - diff --git a/README.md b/README.md index 422bc1bad..7657c69af 100644 --- a/README.md +++ b/README.md @@ -114,4 +114,3 @@ contributions to ReSolve must be made under the smae licensing terms. **Please Note** If you are using ReSolve with any third party libraries linked in (e.g., KLU), be sure to review the respective license of the package as that license may have more restrictive terms than the ReSolve license. - diff --git a/buildsystem/ascent-env.sh b/buildsystem/ascent-env.sh index ded2f1d74..8984a5b52 100644 --- a/buildsystem/ascent-env.sh +++ b/buildsystem/ascent-env.sh @@ -2,4 +2,3 @@ module load gcc/10.2 # Load spack deps source ./buildsystem/spack/ascent/modules/dependencies.sh - diff --git a/buildsystem/incline-env.sh b/buildsystem/incline-env.sh index 348139ff9..3b4b9cbb2 100644 --- a/buildsystem/incline-env.sh +++ b/buildsystem/incline-env.sh @@ -12,4 +12,3 @@ unset GPU_DEVICE_ORDINAL # Load spack generated modules source ./buildsystem/spack/incline/modules/dependencies.sh - diff --git a/buildsystem/init-mirror.sh b/buildsystem/init-mirror.sh index 3a175d02a..cf8d563d9 100755 --- a/buildsystem/init-mirror.sh +++ b/buildsystem/init-mirror.sh @@ -15,4 +15,3 @@ res=$? chmod -R ugo+wrx $SPACK_MIRROR & exit $res - diff --git a/buildsystem/spack/ascent/env.sh b/buildsystem/spack/ascent/env.sh index 6b6d97c5e..42c0851b8 100644 --- a/buildsystem/spack/ascent/env.sh +++ b/buildsystem/spack/ascent/env.sh @@ -21,4 +21,3 @@ export SPACK_PYTHON=$(which python) export tempdir=$SPACK_CACHE export TMP=$SPACK_CACHE export TMPDIR=$SPACK_CACHE - diff --git a/buildsystem/spack/ascent/spack.yaml b/buildsystem/spack/ascent/spack.yaml index 912e0e968..38ffabcf3 100644 --- a/buildsystem/spack/ascent/spack.yaml +++ b/buildsystem/spack/ascent/spack.yaml @@ -65,4 +65,3 @@ spack: modules: [gcc/10.2] operating_system: rhel8 target: ppc64le - diff --git a/buildsystem/spack/deception/spack.yaml b/buildsystem/spack/deception/spack.yaml index bd885b575..cb675f399 100644 --- a/buildsystem/spack/deception/spack.yaml +++ b/buildsystem/spack/deception/spack.yaml @@ -57,4 +57,4 @@ spack: fc: /share/apps/gcc/9.1.0/bin/gfortran operating_system: centos7 target: x86_64 - modules: [gcc/9.1.0] \ No newline at end of file + modules: [gcc/9.1.0] diff --git a/buildsystem/spack/incline/env.sh b/buildsystem/spack/incline/env.sh index 035ef6cc3..1447691d8 100644 --- a/buildsystem/spack/incline/env.sh +++ b/buildsystem/spack/incline/env.sh @@ -24,4 +24,3 @@ export SPACK_PYTHON=$(which python3) export tempdir=$SPACK_CACHE export TMP=$SPACK_CACHE export TMPDIR=$SPACK_CACHE - diff --git a/cmake/ReSolveFindHipLibraries.cmake b/cmake/ReSolveFindHipLibraries.cmake index 059aac97e..5ec861d04 100644 --- a/cmake/ReSolveFindHipLibraries.cmake +++ b/cmake/ReSolveFindHipLibraries.cmake @@ -34,4 +34,3 @@ target_include_directories(resolve_hip INTERFACE $) install(TARGETS resolve_hip EXPORT ReSolveTargets) - diff --git a/docs/doxygen/doxygen-awesome-interactive-toc.js b/docs/doxygen/doxygen-awesome-interactive-toc.js index 20a9669d7..7201c687b 100644 --- a/docs/doxygen/doxygen-awesome-interactive-toc.js +++ b/docs/doxygen/doxygen-awesome-interactive-toc.js @@ -78,4 +78,4 @@ class DoxygenAwesomeInteractiveToc { active?.classList.add("active") active?.classList.remove("aboveActive") } -} \ No newline at end of file +} diff --git a/docs/doxygen/doxygen-awesome-tabs.js b/docs/doxygen/doxygen-awesome-tabs.js index 8e725b230..b6f99e241 100644 --- a/docs/doxygen/doxygen-awesome-tabs.js +++ b/docs/doxygen/doxygen-awesome-tabs.js @@ -67,4 +67,4 @@ class DoxygenAwesomeTabs { }) }) } -} \ No newline at end of file +} diff --git a/docs/doxygen/index.rst b/docs/doxygen/index.rst index 833e50b1d..e9bdb7b95 100644 --- a/docs/doxygen/index.rst +++ b/docs/doxygen/index.rst @@ -4,4 +4,3 @@ API Documentation Doxygen generated API documentation can be found here: `Doxygen Docs `_ - diff --git a/docs/requirements.txt b/docs/requirements.txt index 52330aba6..e4a9e26ac 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,4 +2,4 @@ docutils sphinx==6.2.1 sphinx-rtd-theme==1.2.2 breathe -m2r2 \ No newline at end of file +m2r2 diff --git a/docs/sphinx/license.rst b/docs/sphinx/license.rst index ba8dc9d62..73ae42ae2 100644 --- a/docs/sphinx/license.rst +++ b/docs/sphinx/license.rst @@ -2,4 +2,4 @@ License ******* -.. include:: ../../LICENSE \ No newline at end of file +.. include:: ../../LICENSE diff --git a/docs/sphinx/style/theme_overrides.css b/docs/sphinx/style/theme_overrides.css index c9feb6f0a..d68b9dfcc 100644 --- a/docs/sphinx/style/theme_overrides.css +++ b/docs/sphinx/style/theme_overrides.css @@ -19,4 +19,4 @@ /* Sidebar header (and topbar for mobile) */ .wy-side-nav-search, .wy-nav-top { background: #343131; - } \ No newline at end of file + } diff --git a/examples/ExampleHelper.hpp b/examples/ExampleHelper.hpp index 19269a372..ec812c263 100644 --- a/examples/ExampleHelper.hpp +++ b/examples/ExampleHelper.hpp @@ -1,9 +1,14 @@ #pragma once -#include +#include #include +#include + #include -#include +#include +#include +#include +#include namespace ReSolve { @@ -23,10 +28,10 @@ namespace ReSolve std::cout << "========================================================================================================================\n"; std::cout << std::endl; - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; } /** @@ -41,345 +46,357 @@ namespace ReSolve template class ExampleHelper { - public: - /** - * @brief Default constructor - * - * Initializes matrix and vector handlers. - * - * @param[in,out] workspace - workspace for matrix and vector handlers - * - * @pre Workspace handles are initialized - * - * @post Handlers are instantiated. - * allocated - */ - ExampleHelper(workspace_type& workspace) - : mh_(&workspace), - vh_(&workspace) + public: + /** + * @brief Default constructor + * + * Initializes matrix and vector handlers. + * + * @param[in,out] workspace - workspace for matrix and vector handlers + * + * @pre Workspace handles are initialized + * + * @post Handlers are instantiated. + * allocated + */ + ExampleHelper(workspace_type& workspace) + : mh_(&workspace), + vh_(&workspace) + { + memspace_ = ReSolve::memory::DEVICE; + if (mh_.getIsCudaEnabled()) { - memspace_ = ReSolve::memory::DEVICE; - if (mh_.getIsCudaEnabled()) { - hardware_backend_ = "CUDA"; - } else if (mh_.getIsHipEnabled()) { - hardware_backend_ = "HIP"; - } else { - hardware_backend_ = "CPU"; - memspace_ = ReSolve::memory::HOST; - } + hardware_backend_ = "CUDA"; } - - /** - * @brief Destroy the ExampleHelper object - * - * @post Vectors res_ and x_true_ are deleted. - * - */ - ~ExampleHelper() + else if (mh_.getIsHipEnabled()) { - if (res_) { - delete res_; - res_ = nullptr; - } - if (x_true_) { - delete x_true_; - x_true_ = nullptr; - } + hardware_backend_ = "HIP"; } - - std::string getHardwareBackend() const + else { - return hardware_backend_; + hardware_backend_ = "CPU"; + memspace_ = ReSolve::memory::HOST; } - - /** - * @brief Set the new linear system together with its computed solution - * and compute solution error and residual norms. - * - * This will set the new system A*x = r and compute related error norms. - * - * @param A[in] - Linear system matrix - * @param r[in] - Linear system right-hand side - * @param x[in] - Computed solution of the linear system - */ - void setSystem(ReSolve::matrix::Sparse* A, - ReSolve::vector::Vector* r, - ReSolve::vector::Vector* x) + } + + /** + * @brief Destroy the ExampleHelper object + * + * @post Vectors res_ and x_true_ are deleted. + * + */ + ~ExampleHelper() + { + if (res_) { - assert((res_ == nullptr) && (x_true_ == nullptr)); - A_ = A; - r_ = r; - x_ = x; - res_ = new ReSolve::vector::Vector(A->getNumRows()); - computeNorms(); + delete res_; + res_ = nullptr; } - - /** - * @brief Set the new linear system together with its computed solution - * and compute solution error and residual norms. - * - * This is to be used after values in A and r are updated. - * - * @todo This method probably does not need any input parameters. - * - * @param A[in] - Linear system matrix - * @param r[in] - Linear system right-hand side - * @param x[in] - Computed solution of the linear system - */ - void resetSystem(ReSolve::matrix::Sparse* A, - ReSolve::vector::Vector* r, - ReSolve::vector::Vector* x) + if (x_true_) { - A_ = A; - r_ = r; - x_ = x; - if(res_ == nullptr) { - res_ = new ReSolve::vector::Vector(A->getNumRows()); - } - - computeNorms(); + delete x_true_; + x_true_ = nullptr; } - - /// Return L2 norm of the linear system residual. - ReSolve::real_type getNormResidual() + } + + std::string getHardwareBackend() const + { + return hardware_backend_; + } + + /** + * @brief Set the new linear system together with its computed solution + * and compute solution error and residual norms. + * + * This will set the new system A*x = r and compute related error norms. + * + * @param A[in] - Linear system matrix + * @param r[in] - Linear system right-hand side + * @param x[in] - Computed solution of the linear system + */ + void setSystem(ReSolve::matrix::Sparse* A, + ReSolve::vector::Vector* r, + ReSolve::vector::Vector* x) + { + assert((res_ == nullptr) && (x_true_ == nullptr)); + A_ = A; + r_ = r; + x_ = x; + res_ = new ReSolve::vector::Vector(A->getNumRows()); + computeNorms(); + } + + /** + * @brief Set the new linear system together with its computed solution + * and compute solution error and residual norms. + * + * This is to be used after values in A and r are updated. + * + * @todo This method probably does not need any input parameters. + * + * @param A[in] - Linear system matrix + * @param r[in] - Linear system right-hand side + * @param x[in] - Computed solution of the linear system + */ + void resetSystem(ReSolve::matrix::Sparse* A, + ReSolve::vector::Vector* r, + ReSolve::vector::Vector* x) + { + A_ = A; + r_ = r; + x_ = x; + if (res_ == nullptr) { - return norm_res_; + res_ = new ReSolve::vector::Vector(A->getNumRows()); } - /// Return relative residual norm. - ReSolve::real_type getNormRelativeResidual() + computeNorms(); + } + + /// Return L2 norm of the linear system residual. + ReSolve::real_type getNormResidual() + { + return norm_res_; + } + + /// Return relative residual norm. + ReSolve::real_type getNormRelativeResidual() + { + return norm_res_ / norm_rhs_; + } + + /// Minimalistic summary + void printShortSummary() + { + std::cout << "\t2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << getNormRelativeResidual() << "\n"; + } + + /// Summary of direct solve + void printSummary() + { + std::cout << "\t 2-Norm of the residual (before IR): " + << std::scientific << std::setprecision(16) + << getNormRelativeResidual() << "\n"; + + std::cout << std::scientific << std::setprecision(16) + << "\t Matrix inf norm: " << inf_norm_A_ << "\n" + << "\t Residual inf norm: " << inf_norm_res_ << "\n" + << "\t Solution inf norm: " << inf_norm_x_ << "\n" + << "\t Norm of scaled residuals: " << nsr_norm_ << "\n"; + } + + /// Summary of error norms for an iterative refinement test. + void printIrSummary(ReSolve::LinSolverIterative* ls) + { + std::cout << "FGMRES: init nrm: " + << std::scientific << std::setprecision(16) + << ls->getInitResidualNorm() / norm_rhs_ + << " final nrm: " + << ls->getFinalResidualNorm() / norm_rhs_ + << " iter: " << ls->getNumIter() << "\n"; + } + + /// Summary of error norms for an iterative solver test. + void printIterativeSolverSummary(ReSolve::LinSolverIterative* ls) + { + std::cout << std::setprecision(16) << std::scientific; + std::cout << "\t Initial residual norm ||b-A*x|| : " << ls->getInitResidualNorm() << "\n"; + std::cout << "\t Initial relative residual norm ||b-A*x||/||b|| : " << ls->getInitResidualNorm() / norm_rhs_ << "\n"; + std::cout << "\t Final residual norm ||b-A*x|| : " << ls->getFinalResidualNorm() << "\n"; + std::cout << "\t Final relative residual norm ||b-A*x||/||b|| : " << ls->getFinalResidualNorm() / norm_rhs_ << "\n"; + std::cout << "\t Number of iterations : " << ls->getNumIter() << "\n"; + } + + /// Check the relative residual norm against `tolerance`. + int checkResult(ReSolve::real_type tolerance) + { + int error_sum = 0; + ReSolve::real_type norm = norm_res_ / norm_rhs_; + + if (!std::isfinite(norm)) { - return norm_res_/norm_rhs_; + std::cout << "Result is not a finite number!\n"; + error_sum++; } - - /// Minimalistic summary - void printShortSummary() + if (norm > tolerance) { - std::cout << "\t2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << getNormRelativeResidual() << "\n"; + std::cout << "Result inaccurate!\n"; + error_sum++; } - /// Summary of direct solve - void printSummary() + return error_sum; + } + + /** + * @brief Verify the computation of the norm of scaled residuals. + * + * The norm value is provided as the input. This function computes + * the norm of scaled residuals for the system that has been set + * by the constructor or (re)setSystem functions. + * + * @param nsr_system - norm of scaled residuals value to be verified + * @return int - 0 if the result is correct, error code otherwise + */ + int checkNormOfScaledResiduals(ReSolve::real_type nsr_system) + { + using namespace ReSolve; + int error_sum = 0; + + // Compute residual norm to get updated vector res_ + res_->copyDataFrom(r_, memspace_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + // Compute norm of scaled residuals + real_type inf_norm_A = 0.0; + mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); + real_type inf_norm_x = vh_.infNorm(x_, memspace_); + real_type inf_norm_res = vh_.infNorm(res_, memspace_); + real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); + real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; + + // Test norm of scaled residuals method in SystemSolver + if (error > 10.0 * std::numeric_limits::epsilon()) { - std::cout << "\t 2-Norm of the residual (before IR): " - << std::scientific << std::setprecision(16) - << getNormRelativeResidual() << "\n"; - + std::cout << "Norm of scaled residuals computation failed:\n"; std::cout << std::scientific << std::setprecision(16) - << "\t Matrix inf norm: " << inf_norm_A_ << "\n" - << "\t Residual inf norm: " << inf_norm_res_ << "\n" - << "\t Solution inf norm: " << inf_norm_x_ << "\n" - << "\t Norm of scaled residuals: " << nsr_norm_ << "\n"; - } - - /// Summary of error norms for an iterative refinement test. - void printIrSummary(ReSolve::LinSolverIterative* ls) - { - std::cout << "FGMRES: init nrm: " - << std::scientific << std::setprecision(16) - << ls->getInitResidualNorm()/norm_rhs_ - << " final nrm: " - << ls->getFinalResidualNorm()/norm_rhs_ - << " iter: " << ls->getNumIter() << "\n"; - } - - /// Summary of error norms for an iterative solver test. - void printIterativeSolverSummary(ReSolve::LinSolverIterative* ls) - { - std::cout << std::setprecision(16) << std::scientific; - std::cout << "\t Initial residual norm ||b-A*x|| : " << ls->getInitResidualNorm() << "\n"; - std::cout << "\t Initial relative residual norm ||b-A*x||/||b|| : " << ls->getInitResidualNorm()/norm_rhs_ << "\n"; - std::cout << "\t Final residual norm ||b-A*x|| : " << ls->getFinalResidualNorm() << "\n"; - std::cout << "\t Final relative residual norm ||b-A*x||/||b|| : " << ls->getFinalResidualNorm()/norm_rhs_ << "\n"; - std::cout << "\t Number of iterations : " << ls->getNumIter() << "\n"; + << "\tMatrix inf norm : " << inf_norm_A << "\n" + << "\tResidual inf norm : " << inf_norm_res << "\n" + << "\tSolution inf norm : " << inf_norm_x << "\n" + << "\tNorm of scaled residuals : " << nsr_norm << "\n" + << "\tNorm of scaled residuals (system): " << nsr_system << "\n\n"; } - - /// Check the relative residual norm against `tolerance`. - int checkResult(ReSolve::real_type tolerance) + return error_sum; + } + + /** + * @brief Verify the computation of the relative residual norm. + * + * The norm value is provided as the input. This function computes + * the relative residual norm for the system that has been set + * by the constructor or (re)setSystem functions. + * + * @param rrn_system - relative residual norm value to be verified + * @return int - 0 if the result is correct, error code otherwise + */ + int checkRelativeResidualNorm(ReSolve::real_type rrn_system) + { + using namespace ReSolve; + int error_sum = 0; + + // Compute residual norm + res_->copyDataFrom(r_, memspace_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + real_type error = std::abs(norm_rhs_ * rrn_system - norm_res_) / norm_res_; + if (error > 10.0 * std::numeric_limits::epsilon()) { - int error_sum = 0; - ReSolve::real_type norm = norm_res_/norm_rhs_; - - if (!std::isfinite(norm)) { - std::cout << "Result is not a finite number!\n"; - error_sum++; - } - if (norm > tolerance) { - std::cout << "Result inaccurate!\n"; - error_sum++; - } - - return error_sum; - } - - /** - * @brief Verify the computation of the norm of scaled residuals. - * - * The norm value is provided as the input. This function computes - * the norm of scaled residuals for the system that has been set - * by the constructor or (re)setSystem functions. - * - * @param nsr_system - norm of scaled residuals value to be verified - * @return int - 0 if the result is correct, error code otherwise - */ - int checkNormOfScaledResiduals(ReSolve::real_type nsr_system) - { - using namespace ReSolve; - int error_sum = 0; - - // Compute residual norm to get updated vector res_ - res_->copyDataFrom(r_, memspace_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - // Compute norm of scaled residuals - real_type inf_norm_A = 0.0; - mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.infNorm(x_, memspace_); - real_type inf_norm_res = vh_.infNorm(res_, memspace_); - real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); - real_type error = std::abs(nsr_system - nsr_norm)/nsr_norm; - - // Test norm of scaled residuals method in SystemSolver - if (error > 10.0*std::numeric_limits::epsilon()) - { - std::cout << "Norm of scaled residuals computation failed:\n"; - std::cout << std::scientific << std::setprecision(16) - << "\tMatrix inf norm : " << inf_norm_A << "\n" - << "\tResidual inf norm : " << inf_norm_res << "\n" - << "\tSolution inf norm : " << inf_norm_x << "\n" - << "\tNorm of scaled residuals : " << nsr_norm << "\n" - << "\tNorm of scaled residuals (system): " << nsr_system << "\n\n"; - } - return error_sum; - } - - /** - * @brief Verify the computation of the relative residual norm. - * - * The norm value is provided as the input. This function computes - * the relative residual norm for the system that has been set - * by the constructor or (re)setSystem functions. - * - * @param rrn_system - relative residual norm value to be verified - * @return int - 0 if the result is correct, error code otherwise - */ - int checkRelativeResidualNorm(ReSolve::real_type rrn_system) - { - using namespace ReSolve; - int error_sum = 0; - - // Compute residual norm - res_->copyDataFrom(r_, memspace_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - real_type error = std::abs(norm_rhs_ * rrn_system - norm_res_)/norm_res_; - if (error > 10.0*std::numeric_limits::epsilon()) { - std::cout << "Relative residual norm computation failed:\n"; - std::cout << std::scientific << std::setprecision(16) - << "\tTest value : " << norm_res_/norm_rhs_ << "\n" - << "\tSystemSolver computed : " << rrn_system << "\n\n"; - error_sum++; - } - return error_sum; - } - - /** - * @brief Verify the computation of the residual norm. - * - * The norm value is provided as the input. This function computes - * the residual norm for the system that has been set by the constructor - * or (re)setSystem functions. - * - * @param rrn_system - residual norm value to be verified - * @return int - 0 if the result is correct, error code otherwise - */ - int checkResidualNorm(ReSolve::real_type rn_system) - { - using namespace ReSolve; - int error_sum = 0; - - // Compute residual norm - res_->copyDataFrom(r_, memspace_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - real_type error = std::abs(rn_system - norm_res_)/norm_res_; - if (error > 10.0*std::numeric_limits::epsilon()) { - std::cout << "Residual norm computation failed:\n"; - std::cout << std::scientific << std::setprecision(16) - << "\tTest value : " << norm_res_ << "\n" - << "\tSystemSolver computed : " << rn_system << "\n\n"; - error_sum++; - } - return error_sum; - } - - private: - /// Compute error norms. - void computeNorms() - { - // Compute rhs and residual norms - res_->copyDataFrom(r_, memspace_, memspace_); - norm_rhs_ = norm2(*r_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - // Compute norm of scaled residuals - mh_.matrixInfNorm(A_, &inf_norm_A_, memspace_); - inf_norm_x_ = vh_.infNorm(x_, memspace_); - inf_norm_res_ = vh_.infNorm(res_, memspace_); - nsr_norm_ = inf_norm_res_ / (inf_norm_A_ * inf_norm_x_); - } - - /** - * @brief Computes residual norm = || A * x - r ||_2 - * - * @param[in] A - system matrix - * @param[in] x - computed solution of the system - * @param[in,out] r - system right-hand side, residual vector - * @param[in] memspace memory space where to computate the norm - * @return ReSolve::real_type - * - * @post r is overwritten with residual values - */ - ReSolve::real_type computeResidualNorm(ReSolve::matrix::Sparse& A, - ReSolve::vector::Vector& x, - ReSolve::vector::Vector& r, - ReSolve::memory::MemorySpace memspace) - { - using namespace ReSolve::constants; - mh_.matvec(&A, &x, &r, &ONE, &MINUS_ONE, memspace); // r := A * x - r - return norm2(r, memspace); + std::cout << "Relative residual norm computation failed:\n"; + std::cout << std::scientific << std::setprecision(16) + << "\tTest value : " << norm_res_ / norm_rhs_ << "\n" + << "\tSystemSolver computed : " << rrn_system << "\n\n"; + error_sum++; } - - /// Compute L2 norm of vector `r` in memory space `memspace`. - ReSolve::real_type norm2(ReSolve::vector::Vector& r, - ReSolve::memory::MemorySpace memspace) + return error_sum; + } + + /** + * @brief Verify the computation of the residual norm. + * + * The norm value is provided as the input. This function computes + * the residual norm for the system that has been set by the constructor + * or (re)setSystem functions. + * + * @param rrn_system - residual norm value to be verified + * @return int - 0 if the result is correct, error code otherwise + */ + int checkResidualNorm(ReSolve::real_type rn_system) + { + using namespace ReSolve; + int error_sum = 0; + + // Compute residual norm + res_->copyDataFrom(r_, memspace_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + real_type error = std::abs(rn_system - norm_res_) / norm_res_; + if (error > 10.0 * std::numeric_limits::epsilon()) { - return std::sqrt(vh_.dot(&r, &r, memspace)); + std::cout << "Residual norm computation failed:\n"; + std::cout << std::scientific << std::setprecision(16) + << "\tTest value : " << norm_res_ << "\n" + << "\tSystemSolver computed : " << rn_system << "\n\n"; + error_sum++; } - - private: - ReSolve::matrix::Sparse* A_; ///< pointer to system matrix - ReSolve::vector::Vector* r_; ///< pointer to system right-hand side - ReSolve::vector::Vector* x_; ///< pointer to the computed solution - - ReSolve::MatrixHandler mh_; ///< matrix handler instance - ReSolve::VectorHandler vh_; ///< vector handler instance - - ReSolve::vector::Vector* res_{nullptr}; ///< pointer to residual vector - ReSolve::vector::Vector* x_true_{nullptr}; ///< pointer to solution error vector - - ReSolve::real_type norm_rhs_{0.0}; ///< right-hand side vector norm - ReSolve::real_type norm_res_{0.0}; ///< residual vector norm - - real_type inf_norm_A_{0.0}; ///< infinity norm of matrix A - real_type inf_norm_x_{0.0}; ///< infinity norm of solution x - real_type inf_norm_res_{0.0}; ///< infinity norm of res = A*x - r - real_type nsr_norm_{0.0}; ///< norm of scaled residuals - - ReSolve::memory::MemorySpace memspace_{ReSolve::memory::HOST}; - std::string hardware_backend_{"NONE"}; + return error_sum; + } + + private: + /// Compute error norms. + void computeNorms() + { + // Compute rhs and residual norms + res_->copyDataFrom(r_, memspace_, memspace_); + norm_rhs_ = norm2(*r_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + // Compute norm of scaled residuals + mh_.matrixInfNorm(A_, &inf_norm_A_, memspace_); + inf_norm_x_ = vh_.infNorm(x_, memspace_); + inf_norm_res_ = vh_.infNorm(res_, memspace_); + nsr_norm_ = inf_norm_res_ / (inf_norm_A_ * inf_norm_x_); + } + + /** + * @brief Computes residual norm = || A * x - r ||_2 + * + * @param[in] A - system matrix + * @param[in] x - computed solution of the system + * @param[in,out] r - system right-hand side, residual vector + * @param[in] memspace memory space where to computate the norm + * @return ReSolve::real_type + * + * @post r is overwritten with residual values + */ + ReSolve::real_type computeResidualNorm(ReSolve::matrix::Sparse& A, + ReSolve::vector::Vector& x, + ReSolve::vector::Vector& r, + ReSolve::memory::MemorySpace memspace) + { + using namespace ReSolve::constants; + mh_.matvec(&A, &x, &r, &ONE, &MINUS_ONE, memspace); // r := A * x - r + return norm2(r, memspace); + } + + /// Compute L2 norm of vector `r` in memory space `memspace`. + ReSolve::real_type norm2(ReSolve::vector::Vector& r, + ReSolve::memory::MemorySpace memspace) + { + return std::sqrt(vh_.dot(&r, &r, memspace)); + } + + private: + ReSolve::matrix::Sparse* A_; ///< pointer to system matrix + ReSolve::vector::Vector* r_; ///< pointer to system right-hand side + ReSolve::vector::Vector* x_; ///< pointer to the computed solution + + ReSolve::MatrixHandler mh_; ///< matrix handler instance + ReSolve::VectorHandler vh_; ///< vector handler instance + + ReSolve::vector::Vector* res_{nullptr}; ///< pointer to residual vector + ReSolve::vector::Vector* x_true_{nullptr}; ///< pointer to solution error vector + + ReSolve::real_type norm_rhs_{0.0}; ///< right-hand side vector norm + ReSolve::real_type norm_res_{0.0}; ///< residual vector norm + + real_type inf_norm_A_{0.0}; ///< infinity norm of matrix A + real_type inf_norm_x_{0.0}; ///< infinity norm of solution x + real_type inf_norm_res_{0.0}; ///< infinity norm of res = A*x - r + real_type nsr_norm_{0.0}; ///< norm of scaled residuals + + ReSolve::memory::MemorySpace memspace_{ReSolve::memory::HOST}; + std::string hardware_backend_{"NONE"}; }; } // namespace examples diff --git a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp index ac3300b94..401c1ff3d 100644 --- a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp @@ -1,157 +1,173 @@ -#include -#include #include +#include +#include -#include -#include +#include +#include #include #include #include +#include +#include #include -#include -#include #include // this updates the matrix values to simulate what CFD/optimization software does. using namespace ReSolve::constants; -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; using matrix_type = ReSolve::matrix::Sparse; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; - vector_type* vec_x = nullptr; - vector_type* vec_r = nullptr; + vector_type* vec_x = nullptr; + vector_type* vec_r = nullptr; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); - } else { - if (i==1) { + } + else + { + if (i == 1) + { A_exp = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); - } else { + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A_exp); } - std::cout<<"Updating values of A_coo!"<copyValues(A_exp->getValues(ReSolve::memory::HOST), ReSolve::memory::HOST, ReSolve::memory::HOST); - //ReSolve::io::updateMatrixFromFile(mat_file, A); + // ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 1) { + if (i < 1) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - } else { + } + else + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver + // Now call direct solver int status; - if (i < 1){ + if (i < 1) + { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<getLFactor(); matrix_type* U = KLU->getUFactor(); - if (L == nullptr) {printf("ERROR");} + if (L == nullptr) + { + printf("ERROR"); + } index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); GLU->setup(A, L, U, P, Q); status = GLU->solve(vec_rhs, vec_x); - std::cout<<"GLU solve status: "<solve(vec_rhs, vec_x); // std::cout<<"KLU solve status: "<refactorize(); - std::cout<<"Using CUSOLVER GLU"<refactorize(); + std::cout << "Using CUSOLVER GLU" << std::endl; status = GLU->refactorize(); - std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"CUSOLVER GLU solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); @@ -160,18 +176,18 @@ int main(int argc, char *argv[]) << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) << "\n"; } - //now DELETE - delete A; - delete A_exp; - delete KLU; - delete GLU; - delete [] x; - delete [] rhs; - delete vec_r; - delete vec_x; - delete workspace_CUDA; - delete matrix_handler; - delete vector_handler; - - return 0; - } + // now DELETE + delete A; + delete A_exp; + delete KLU; + delete GLU; + delete[] x; + delete[] rhs; + delete vec_r; + delete vec_x; + delete workspace_CUDA; + delete matrix_handler; + delete vector_handler; + + return 0; +} diff --git a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp index 76f4d5920..fb4854256 100644 --- a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp @@ -1,34 +1,34 @@ -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[] ) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; vector_type* vec_r = nullptr; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf(); + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf(); real_type res_nrm = 0.0; - real_type b_nrm = 0.0; + real_type b_nrm = 0.0; // We need them. They hold a POINTER. Don't delete them here. KLU deletes them. ReSolve::matrix::Csc* L_csc; ReSolve::matrix::Csc* U_csc; - index_type* P; - index_type* Q; + index_type* P; + index_type* Q; - int status = 0; + int status = 0; int status_refactor = 0; for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_r = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + vec_r = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 2) { + if (i < 2) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - } else { + } + else + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver - if (i < 2) { + // Now call direct solver + if (i < 2) + { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<getLFactor(); - U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); + std::cout << "KLU solve status: " << status << std::endl; + if (i == 1) + { + L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); + U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); L_csc->syncData(ReSolve::memory::DEVICE); U_csc->syncData(ReSolve::memory::DEVICE); - matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); - matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); - if (L == nullptr) { + matrix_handler->csc2csr(L_csc, L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc, U, ReSolve::memory::DEVICE); + if (L == nullptr) + { std::cout << "ERROR\n"; } P = KLU->getPOrdering(); @@ -151,12 +162,14 @@ int main(int argc, char *argv[] ) delete L; delete U; } - } else { - std::cout<<"Using cusolver rf"<refactorize(); - std::cout<<"cusolver rf refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"cusolver rf solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); @@ -167,20 +180,24 @@ int main(int argc, char *argv[] ) b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); std::cout << "\t2-Norm of the residual: " << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; - if (((res_nrm/b_nrm > 1e-7 ) && (!std::isnan(res_nrm))) || (status_refactor != 0 )) { - if ((res_nrm/b_nrm > 1e-7 )) { + << res_nrm / b_nrm << "\n"; + if (((res_nrm / b_nrm > 1e-7) && (!std::isnan(res_nrm))) || (status_refactor != 0)) + { + if ((res_nrm / b_nrm > 1e-7)) + { std::cout << "\n \t !!! ALERT !!! Residual norm is too large; redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n \n"; - } else { + } + else + { std::cout << "\n \t !!! ALERT !!! cuSolverRf crashed; redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n \n"; } KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); @@ -190,10 +207,9 @@ int main(int argc, char *argv[] ) matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); - std::cout <<"\t New residual norm: " + std::cout << "\t New residual norm: " << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; - + << res_nrm / b_nrm << "\n"; L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); @@ -214,12 +230,12 @@ int main(int argc, char *argv[] ) } } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp index 0fd0c86a6..8f9a6b1e5 100644 --- a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -1,36 +1,36 @@ -#include -#include #include +#include +#include #include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; @@ -52,87 +52,95 @@ int main(int argc, char *argv[]) ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::CGS2); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; - ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; + ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + if (i == 0) + { + A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } - else { + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 2) { + if (i < 2) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } + else + { A->syncData(ReSolve::memory::DEVICE); vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; // Now call direct solver - int status = 0; + int status = 0; real_type norm_b; - if (i < 2){ + if (i < 2) + { KLU->setup(A); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); @@ -140,31 +148,35 @@ int main(int argc, char *argv[]) matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); std::cout << "\t 2-Norm of the residual : " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; - if (i == 1) { + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) / norm_b << "\n"; + if (i == 1) + { ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); - ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); L_csc->syncData(ReSolve::memory::DEVICE); U_csc->syncData(ReSolve::memory::DEVICE); - matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); - matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); - if (L == nullptr) { + matrix_handler->csc2csr(L_csc, L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc, U, ReSolve::memory::DEVICE); + if (L == nullptr) + { std::cout << "ERROR\n"; } index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); Rf->setup(A, L, U, P, Q); - std::cout<<"about to set FGMRES" <setRestart(1000); FGMRES->setMaxit(2000); FGMRES->setup(A); } - } else { - //status = KLU->refactorize(); - std::cout<<"Using CUSOLVER RF"<refactorize(); + std::cout << "Using CUSOLVER RF" << std::endl; if ((i % 2 == 0)) { status = Rf->refactorize(); @@ -174,12 +186,12 @@ int main(int argc, char *argv[]) status = Rf->solve(vec_rhs, vec_x); FGMRES->setupPreconditioner("LU", Rf); } - //if (i%2!=0) vec_x->setToZero(ReSolve::memory::DEVICE); - real_type norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); + // if (i%2!=0) vec_x->setToZero(ReSolve::memory::DEVICE); + real_type norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); std::cout << "Norm of x (before solve): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; - std::cout<<"CUSOLVER RF solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); @@ -193,7 +205,7 @@ int main(int argc, char *argv[]) std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) / norm_b << "\n"; std::cout << "\t 2-Norm of the RIGHT HAND SIDE: " << std::scientific << std::setprecision(16) << norm_b << "\n"; @@ -203,24 +215,22 @@ int main(int argc, char *argv[]) std::cout << "FGMRES: init nrm: " << std::scientific << std::setprecision(16) - << FGMRES->getInitResidualNorm()/norm_b + << FGMRES->getInitResidualNorm() / norm_b << " final nrm: " - << FGMRES->getFinalResidualNorm()/norm_b + << FGMRES->getFinalResidualNorm() / norm_b << " iter: " << FGMRES->getNumIter() << "\n"; norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); std::cout << "Norm of x (after IR): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; } - - } delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp index 97ba3b9c2..61822df28 100644 --- a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp @@ -1,34 +1,34 @@ -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[] ) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); std::cout << "Family mtx file name: " << matrixFileName << ", total number of matrices: " << numSystems << std::endl; - std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl; + std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl; std::string fileId; std::string rhsId; @@ -39,17 +39,17 @@ int main(int argc, char *argv[] ) ReSolve::LinAlgWorkspaceHIP* workspace_HIP = new ReSolve::LinAlgWorkspaceHIP; workspace_HIP->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; vector_type* vec_r = nullptr; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); real_type res_nrm; real_type b_nrm; @@ -57,84 +57,96 @@ int main(int argc, char *argv[] ) for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_r = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + vec_r = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 2) { + if (i < 2) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - } else { + } + else + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver + // Now call direct solver int status = 0; - if (i < 2){ + if (i < 2) + { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: " << status << std::endl; + std::cout << "KLU analysis status: " << status << std::endl; status = KLU->factorize(); std::cout << "KLU factorization status: " << status << std::endl; status = KLU->solve(vec_rhs, vec_x); std::cout << "KLU solve status: " << status << std::endl; - if (i == 1) { + if (i == 1) + { ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + index_type* P = KLU->getPOrdering(); + index_type* Q = KLU->getQOrdering(); vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); Rf->setup(A, L, U, P, Q, vec_rhs); Rf->refactorize(); } - } else { + } + else + { std::cout << "Using rocsolver rf" << std::endl; status = Rf->refactorize(); std::cout << "rocsolver rf refactorization status: " << status << std::endl; @@ -147,55 +159,55 @@ int main(int argc, char *argv[] ) matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); - b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); + b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; - if (!isnan(res_nrm)) { - if (res_nrm/b_nrm > 1e-7 ) { - std::cout << "\n \t !!! ALERT !!! Residual norm is too large; " - << "redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n\n"; - - KLU->setup(A); - status = KLU->analyze(); - std::cout << "KLU analysis status: " << status << std::endl; - status = KLU->factorize(); - std::cout << "KLU factorization status: " << status << std::endl; - status = KLU->solve(vec_rhs, vec_x); - std::cout << "KLU solve status: " << status << std::endl; - - vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + << res_nrm / b_nrm << "\n"; + if (!isnan(res_nrm)) + { + if (res_nrm / b_nrm > 1e-7) + { + std::cout << "\n \t !!! ALERT !!! Residual norm is too large; " + << "redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n\n"; + + KLU->setup(A); + status = KLU->analyze(); + std::cout << "KLU analysis status: " << status << std::endl; + status = KLU->factorize(); + std::cout << "KLU factorization status: " << status << std::endl; + status = KLU->solve(vec_rhs, vec_x); + std::cout << "KLU solve status: " << status << std::endl; - matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); - res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - std::cout << "\t New residual norm: " - << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); + res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + std::cout << "\t New residual norm: " + << std::scientific << std::setprecision(16) + << res_nrm / b_nrm << "\n"; - ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); - ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); + ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); + ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + index_type* P = KLU->getPOrdering(); + index_type* Q = KLU->getQOrdering(); - Rf->setup(A, L, U, P, Q, vec_rhs); - } + Rf->setup(A, L, U, P, Q, vec_rhs); + } } - } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_HIP; diff --git a/examples/gluRefactor.cpp b/examples/gluRefactor.cpp index 21454342f..419c65903 100644 --- a/examples/gluRefactor.cpp +++ b/examples/gluRefactor.cpp @@ -2,32 +2,32 @@ * @file gpuRefactor.cpp * @author Slaven Peles (peless@ornl.gov) * @author Kasia Swirydowicz (kasia.swirydowicz@amd.com) - * + * * @brief Example of solving linear systems using refactorization on a GPU. - * + * * A series of linear systems is read from files specified at command line * input and solved with refactorization approach on GPU. First system * is solved with KLU solver on CPU, using full factorization, and the * subsequent systems are solved with GLU solver on GPU, using refactorization * approach. It is assumed that all systems in the series have the same * sparsity pattern, so the analysis is done only once for the entire series. - * + * */ -#include -#include #include +#include #include +#include -#include +#include +#include #include -#include -#include +#include #include +#include +#include +#include #include -#include #include -#include -#include #ifdef RESOLVE_USE_CUDA #include @@ -52,10 +52,10 @@ void printHelpInfo() /// Prototype of the example function template -static int gluRefactor(int argc, char *argv[]); +static int gluRefactor(int argc, char* argv[]); /// Main function selects example to be run. -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { #ifdef RESOLVE_USE_CUDA gluRefactor(argc, argv); @@ -75,26 +75,30 @@ int main(int argc, char *argv[]) * @return 0 if the example ran successfully, -1 otherwise */ template -int gluRefactor(int argc, char *argv[]) +int gluRefactor(int argc, char* argv[]) { using namespace ReSolve::examples; using namespace ReSolve; - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = atoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -102,9 +106,12 @@ int gluRefactor(int argc, char *argv[]) std::string matrix_pathname(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -112,9 +119,12 @@ int gluRefactor(int argc, char *argv[]) std::string rhs_pathname(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -122,16 +132,19 @@ int gluRefactor(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } - std::cout << "Family mtx file name: " << matrix_pathname + std::cout << "Family mtx file name: " << matrix_pathname << ", total number of matrices: " << num_systems << "\n" - << "Family rhs file name: " << rhs_pathname - << ", total number of RHSes: " << num_systems << "\n"; + << "Family rhs file name: " << rhs_pathname + << ", total number of RHSes: " << num_systems << "\n"; // Create workspace workspace_type workspace; @@ -142,47 +155,51 @@ int gluRefactor(int argc, char *argv[]) std::cout << "gluRefactor with " << helper.getHardwareBackend() << " backend\n"; // Direct solvers instantiation - LinSolverDirectKLU KLU; + LinSolverDirectKLU KLU; LinSolverDirectCuSolverGLU Rf(&workspace); // Pointers to matrix and vectors defining the linear system - matrix::Csr* A = nullptr; + matrix::Csr* A = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; RESOLVE_RANGE_PUSH(__FUNCTION__); - for (int i = 0; i < num_systems; ++i) { + for (int i = 0; i < num_systems; ++i) + { std::cout << "System " << i << ":\n"; RESOLVE_RANGE_PUSH("File input"); std::ostringstream matname; std::ostringstream rhsname; matname << matrix_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; std::string matrix_pathname_full = matname.str(); std::string rhs_pathname_full = rhsname.str(); // Read matrix and right-hand-side vector std::ifstream mat_file(matrix_pathname_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname_full << "\n"; return -1; } std::ifstream rhs_file(rhs_pathname_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname_full << "\n"; return -1; } bool is_expand_symmetric = true; - if (i == 0) { - A = io::createCsrFromFile(mat_file, is_expand_symmetric); + if (i == 0) + { + A = io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memory::HOST); vec_x->allocate(memory::DEVICE); - } else { + } + else + { io::updateMatrixFromFile(mat_file, A); io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -200,7 +217,8 @@ int gluRefactor(int argc, char *argv[]) int status = 0; - if (i < 1) { + if (i < 1) + { RESOLVE_RANGE_PUSH("KLU"); // Setup factorization solver KLU.setup(A); @@ -220,7 +238,8 @@ int gluRefactor(int argc, char *argv[]) // Extract factors and configure refactorization solver matrix::Csc* L = (matrix::Csc*) KLU.getLFactor(); matrix::Csc* U = (matrix::Csc*) KLU.getUFactor(); - if (L == nullptr || U == nullptr) { + if (L == nullptr || U == nullptr) + { std::cout << "Factor extraction from KLU failed!\n"; } index_type* P = KLU.getPOrdering(); @@ -229,7 +248,9 @@ int gluRefactor(int argc, char *argv[]) status = Rf.setup(A, L, U, P, Q); RESOLVE_RANGE_POP("KLU"); - } else { + } + else + { RESOLVE_RANGE_PUSH("Refactorization"); // Refactorize on the device diff --git a/examples/gpuRefactor.cpp b/examples/gpuRefactor.cpp index f1b4e722d..ca11e7134 100644 --- a/examples/gpuRefactor.cpp +++ b/examples/gpuRefactor.cpp @@ -15,28 +15,29 @@ * entire series. * */ -#include -#include #include +#include #include +#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include #include +#include +#include +#include +#include #include +#include +#include +#include #ifdef RESOLVE_USE_CUDA - #include +#include #endif #ifdef RESOLVE_USE_HIP - #include +#include #endif #include "ExampleHelper.hpp" @@ -58,20 +59,20 @@ void printHelpInfo() /// Prototype of the example function template -static int gpuRefactor(int argc, char *argv[]); +static int gpuRefactor(int argc, char* argv[]); /// Main function selects example to be run. -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { - #ifdef RESOLVE_USE_CUDA - gpuRefactor(argc, argv); - #endif +#ifdef RESOLVE_USE_CUDA + gpuRefactor(argc, argv); +#endif - #ifdef RESOLVE_USE_HIP - gpuRefactor(argc, argv); - #endif +#ifdef RESOLVE_USE_HIP + gpuRefactor(argc, argv); +#endif return 0; } @@ -85,17 +86,18 @@ int main(int argc, char *argv[]) * @return 0 if the example ran successfully, -1 otherwise */ template -int gpuRefactor(int argc, char *argv[]) +int gpuRefactor(int argc, char* argv[]) { using namespace ReSolve::examples; using namespace ReSolve; - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -103,10 +105,13 @@ int gpuRefactor(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = atoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -114,9 +119,12 @@ int gpuRefactor(int argc, char *argv[]) std::string matrix_pathname(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -124,9 +132,12 @@ int gpuRefactor(int argc, char *argv[]) std::string rhs_pathname(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -134,16 +145,19 @@ int gpuRefactor(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } - std::cout << "Family mtx file name: " << matrix_pathname + std::cout << "Family mtx file name: " << matrix_pathname << ", total number of matrices: " << num_systems << "\n" - << "Family rhs file name: " << rhs_pathname - << ", total number of RHSes: " << num_systems << "\n"; + << "Family rhs file name: " << rhs_pathname + << ", total number of RHSes: " << num_systems << "\n"; // Create workspace workspace_type workspace; @@ -159,50 +173,54 @@ int gpuRefactor(int argc, char *argv[]) // Direct solvers instantiation LinSolverDirectKLU KLU; - refactor_type Rf(&workspace); + refactor_type Rf(&workspace); // Iterative solver instantiation - GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); + GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); LinSolverIterativeFGMRES FGMRES(&matrix_handler, &vector_handler, &GS); // Pointers to matrix and vectors defining the linear system - matrix::Csr* A = nullptr; + matrix::Csr* A = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; RESOLVE_RANGE_PUSH(__FUNCTION__); - for (int i = 0; i < num_systems; ++i) { + for (int i = 0; i < num_systems; ++i) + { std::cout << "System " << i << ":\n"; RESOLVE_RANGE_PUSH("File input"); std::ostringstream matname; std::ostringstream rhsname; matname << matrix_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; std::string matrix_pathname_full = matname.str(); std::string rhs_pathname_full = rhsname.str(); // Read matrix and right-hand-side vector std::ifstream mat_file(matrix_pathname_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname_full << "\n"; return -1; } std::ifstream rhs_file(rhs_pathname_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname_full << "\n"; return -1; } bool is_expand_symmetric = true; - if (i == 0) { - A = io::createCsrFromFile(mat_file, is_expand_symmetric); + if (i == 0) + { + A = io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memory::HOST); vec_x->allocate(memory::DEVICE); - } else { + } + else + { io::updateMatrixFromFile(mat_file, A); io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -220,7 +238,8 @@ int gpuRefactor(int argc, char *argv[]) int status = 0; - if (i == 0) { + if (i == 0) + { RESOLVE_RANGE_PUSH("KLU"); // Setup factorization solver KLU.setup(A); @@ -231,7 +250,8 @@ int gpuRefactor(int argc, char *argv[]) std::cout << "KLU analysis status: " << status << std::endl; } - if (i < 2) { + if (i < 2) + { // Numeric factorization status = KLU.factorize(); std::cout << "KLU factorization status: " << status << std::endl; @@ -244,11 +264,13 @@ int gpuRefactor(int argc, char *argv[]) helper.resetSystem(A, vec_rhs, vec_x); helper.printShortSummary(); - if (i == 1) { + if (i == 1) + { // Extract factors and configure refactorization solver matrix::Csc* L = (matrix::Csc*) KLU.getLFactor(); matrix::Csc* U = (matrix::Csc*) KLU.getUFactor(); - if (L == nullptr || U == nullptr) { + if (L == nullptr || U == nullptr) + { std::cout << "Factor extraction from KLU failed!\n"; } index_type* P = KLU.getPOrdering(); @@ -257,12 +279,15 @@ int gpuRefactor(int argc, char *argv[]) Rf.setup(A, L, U, P, Q, vec_rhs); // Setup iterative refinement solver - if (is_iterative_refinement) { + if (is_iterative_refinement) + { FGMRES.setup(A); } } RESOLVE_RANGE_POP("KLU"); - } else { + } + else + { std::cout << "Using refactorization\n"; RESOLVE_RANGE_PUSH("Refactorization"); @@ -278,13 +303,15 @@ int gpuRefactor(int argc, char *argv[]) helper.printSummary(); RESOLVE_RANGE_PUSH("Iterative refinement"); - if (is_iterative_refinement) { + if (is_iterative_refinement) + { // Setup iterative refinement FGMRES.resetMatrix(A); FGMRES.setupPreconditioner("LU", &Rf); // If refactorization produced finite solution do iterative refinement - if (std::isfinite(helper.getNormRelativeResidual())) { + if (std::isfinite(helper.getNormRelativeResidual())) + { FGMRES.solve(vec_rhs, vec_x); // Print summary diff --git a/examples/kluFactor.cpp b/examples/kluFactor.cpp index 8f1c3b587..07f615ecb 100644 --- a/examples/kluFactor.cpp +++ b/examples/kluFactor.cpp @@ -11,22 +11,23 @@ * pattern, so the analysis is done only once for the entire series. * */ -#include #include +#include #include +#include "ExampleHelper.hpp" +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include +#include #include -#include -#include #include -#include "ExampleHelper.hpp" -#include using namespace ReSolve::constants; @@ -45,17 +46,18 @@ void printHelpInfo() std::cout << "\t-i\tEnables iterative refinement.\n\n"; } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; using namespace ReSolve::examples; using namespace ReSolve; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -63,10 +65,13 @@ int main(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = std::stoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -74,9 +79,12 @@ int main(int argc, char *argv[]) std::string matrix_path_name(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -84,9 +92,12 @@ int main(int argc, char *argv[]) std::string rhs_path_name(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -94,9 +105,12 @@ int main(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } @@ -105,17 +119,17 @@ int main(int argc, char *argv[]) std::string matrix_file_name_full; std::string rhs_file_name_full; - matrix::Csr* A = nullptr; - LinAlgWorkspaceCpu workspace; + matrix::Csr* A = nullptr; + LinAlgWorkspaceCpu workspace; ExampleHelper helper(workspace); - MatrixHandler matrix_handler(&workspace); - VectorHandler vector_handler(&workspace); + MatrixHandler matrix_handler(&workspace); + VectorHandler vector_handler(&workspace); vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; - LinSolverDirectKLU KLU; - GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); + LinSolverDirectKLU KLU; + GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); LinSolverIterativeFGMRES FGMRES(&matrix_handler, &vector_handler, &GS); for (int i = 0; i < num_systems; ++i) { @@ -124,28 +138,31 @@ int main(int argc, char *argv[]) std::ostringstream matname; std::ostringstream rhsname; matname << matrix_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; matrix_file_name_full = matname.str(); - rhs_file_name_full = rhsname.str(); + rhs_file_name_full = rhsname.str(); std::ifstream mat_file(matrix_file_name_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_file_name_full << "\n"; return 1; } std::ifstream rhs_file(rhs_file_name_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_file_name_full << "\n"; return 1; } bool is_expand_symmetric = true; - if (i == 0) { + if (i == 0) + { A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = ReSolve::io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -153,31 +170,34 @@ int main(int argc, char *argv[]) mat_file.close(); rhs_file.close(); - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnz()<getNnz() << std::endl; + // Now call direct solver int status; - if (i==0) { + if (i == 0) + { vec_rhs->setDataUpdated(ReSolve::memory::HOST); KLU.setup(A); status = KLU.analyze(); - std::cout<<"KLU analysis status: "< #include +#include #include +#include "ExampleHelper.hpp" +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include +#include #include -#include -#include #include -#include - -#include "ExampleHelper.hpp" using namespace ReSolve::constants; @@ -46,17 +46,18 @@ void printHelpInfo() std::cout << "\t-i\tEnables iterative refinement.\n\n"; } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; using namespace ReSolve::examples; using namespace ReSolve; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -64,10 +65,13 @@ int main(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = std::stoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -75,9 +79,12 @@ int main(int argc, char *argv[]) std::string matrix_path_name(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -85,9 +92,12 @@ int main(int argc, char *argv[]) std::string rhs_path_name(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -95,9 +105,12 @@ int main(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } @@ -106,17 +119,17 @@ int main(int argc, char *argv[]) std::string matrix_file_name_full; std::string rhs_file_name_full; - matrix::Csr* A = nullptr; - LinAlgWorkspaceCpu workspace; + matrix::Csr* A = nullptr; + LinAlgWorkspaceCpu workspace; ExampleHelper helper(workspace); - MatrixHandler matrix_handler(&workspace); - VectorHandler vector_handler(&workspace); + MatrixHandler matrix_handler(&workspace); + VectorHandler vector_handler(&workspace); vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; - LinSolverDirectKLU* KLU = new LinSolverDirectKLU; - GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); + LinSolverDirectKLU* KLU = new LinSolverDirectKLU; + GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); LinSolverIterativeFGMRES FGMRES(&matrix_handler, &vector_handler, &GS); for (int i = 0; i < num_systems; ++i) { @@ -125,28 +138,31 @@ int main(int argc, char *argv[]) std::ostringstream matname; std::ostringstream rhsname; matname << matrix_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; matrix_file_name_full = matname.str(); - rhs_file_name_full = rhsname.str(); + rhs_file_name_full = rhsname.str(); std::ifstream mat_file(matrix_file_name_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_file_name_full << "\n"; return 1; } std::ifstream rhs_file(rhs_file_name_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_file_name_full << "\n"; return 1; } bool is_expand_symmetric = true; - if (i == 0) { + if (i == 0) + { A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = ReSolve::io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -155,19 +171,23 @@ int main(int argc, char *argv[]) rhs_file.close(); std::cout << "COO to CSR completed. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver + // Now call direct solver int status; - if (i==0) { + if (i == 0) + { vec_rhs->setDataUpdated(ReSolve::memory::HOST); KLU->setup(A); status = KLU->analyze(); std::cout << "KLU analysis status: " << status << std::endl; } - if (i < 2){ + if (i < 2) + { status = KLU->factorize(); std::cout << "KLU factorization status: " << status << std::endl; - } else { - status = KLU->refactorize(); + } + else + { + status = KLU->refactorize(); std::cout << "KLU re-factorization status: " << status << std::endl; } status = KLU->solve(vec_rhs, vec_x); @@ -175,13 +195,15 @@ int main(int argc, char *argv[]) helper.resetSystem(A, vec_rhs, vec_x); helper.printShortSummary(); - if (is_iterative_refinement) { + if (is_iterative_refinement) + { // Setup iterative refinement FGMRES.setup(A); FGMRES.setupPreconditioner("LU", KLU); // If refactorization produced finite solution do iterative refinement - if (std::isfinite(helper.getNormRelativeResidual())) { + if (std::isfinite(helper.getNormRelativeResidual())) + { FGMRES.solve(vec_rhs, vec_x); // Print summary @@ -190,7 +212,7 @@ int main(int argc, char *argv[]) } } - //now DELETE + // now DELETE delete A; delete KLU; delete vec_rhs; diff --git a/examples/randGmres.cpp b/examples/randGmres.cpp index 4a1c46f2d..629cc2649 100644 --- a/examples/randGmres.cpp +++ b/examples/randGmres.cpp @@ -1,20 +1,19 @@ -#include -#include #include +#include +#include -#include -#include -#include -#include -#include +#include "ExampleHelper.hpp" #include -#include #include #include -#include +#include +#include +#include +#include #include - -#include "ExampleHelper.hpp" +#include +#include +#include #ifdef RESOLVE_USE_HIP #include @@ -35,9 +34,9 @@ static void printUsage() /// Prototype of the example main function template -static int runGmresExample(int argc, char *argv[]); +static int runGmresExample(int argc, char* argv[]); -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { int status = 0; @@ -62,7 +61,7 @@ int main(int argc, char *argv[]) /// Example implementation template -int runGmresExample(int argc, char *argv[]) +int runGmresExample(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. using namespace ReSolve; @@ -74,16 +73,20 @@ int runGmresExample(int argc, char *argv[]) ReSolve::CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printUsage(); return 1; } std::string matrix_pathname; - auto opt = options.getParamFromKey("-m"); - if (opt) { + auto opt = options.getParamFromKey("-m"); + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printUsage(); return 1; @@ -91,9 +94,12 @@ int runGmresExample(int argc, char *argv[]) std::string rhs_pathname; opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printUsage(); return 1; @@ -110,54 +116,56 @@ int runGmresExample(int argc, char *argv[]) GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); - precon_type Precond(&workspace); + precon_type Precond(&workspace); LinSolverIterativeRandFGMRES FGMRES(&matrix_handler, &vector_handler, LinSolverIterativeRandFGMRES::cs, &GS); // Set memory space where to run tests - std::string hwbackend = "CPU"; - memory::MemorySpace memspace = memory::HOST; - if (matrix_handler.getIsCudaEnabled()) { - memspace = memory::DEVICE; + std::string hwbackend = "CPU"; + memory::MemorySpace memspace = memory::HOST; + if (matrix_handler.getIsCudaEnabled()) + { + memspace = memory::DEVICE; hwbackend = "CUDA"; } - if (matrix_handler.getIsHipEnabled()) { - memspace = memory::DEVICE; + if (matrix_handler.getIsHipEnabled()) + { + memspace = memory::DEVICE; hwbackend = "HIP"; } - matrix::Csr* A = nullptr; + matrix::Csr* A = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; std::ifstream mat_file(matrix_pathname); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname << "\n"; return -1; } std::ifstream rhs_file(rhs_pathname); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname << "\n"; return -1; } bool is_expand_symmetric = true; - A = io::createCsrFromFile(mat_file, is_expand_symmetric); - vec_rhs = io::createVectorFromFile(rhs_file); + A = io::createCsrFromFile(mat_file, is_expand_symmetric); + vec_rhs = io::createVectorFromFile(rhs_file); mat_file.close(); rhs_file.close(); vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memspace); - if (memspace == memory::DEVICE) { + if (memspace == memory::DEVICE) + { A->syncData(memspace); vec_rhs->syncData(memspace); } - printSystemInfo(matrix_pathname, A); matrix_handler.setValuesChanged(true, memspace); diff --git a/examples/resolve_consumer/CMakeLists.txt b/examples/resolve_consumer/CMakeLists.txt index 448685078..45aae031d 100644 --- a/examples/resolve_consumer/CMakeLists.txt +++ b/examples/resolve_consumer/CMakeLists.txt @@ -18,4 +18,4 @@ target_link_libraries(consume.exe PRIVATE ReSolve::ReSolve) enable_testing() # RESOLVE_DATA is set in test.sh and is the file path the matrix data files used in the testKLU_Rf_FGMRES -add_test(NAME resolve_consumer COMMAND $ "-d" "${RESOLVE_DATA}" "-i") \ No newline at end of file +add_test(NAME resolve_consumer COMMAND $ "-d" "${RESOLVE_DATA}" "-i") diff --git a/examples/resolve_consumer/README.md b/examples/resolve_consumer/README.md index 8131e94f1..d9021d01a 100644 --- a/examples/resolve_consumer/README.md +++ b/examples/resolve_consumer/README.md @@ -18,4 +18,4 @@ CI is ran per every merge request that makes sure ReSolve can be consumed as a p If you follow the [developer guidelines](CONTRIBUTING.md) for building resolve and run make test you will see ReSolve consumed and linked with an example test in Test #1 (resolve_Consume). -This ReSolve Consume test is executed via a cmake test that exectutes test.sh. This shell script then goes through the cmake build process to ensure that ReSolve can be built from scratch and linked to another cmake project. \ No newline at end of file +This ReSolve Consume test is executed via a cmake test that exectutes test.sh. This shell script then goes through the cmake build process to ensure that ReSolve can be built from scratch and linked to another cmake project. diff --git a/examples/sysRefactor.cpp b/examples/sysRefactor.cpp index e348a2c21..1cf3d48c5 100644 --- a/examples/sysRefactor.cpp +++ b/examples/sysRefactor.cpp @@ -1,24 +1,22 @@ -#include -#include -#include #include +#include +#include #include #include +#include "ExampleHelper.hpp" +#include #include +#include #include -#include #include -#include -#include +#include #include +#include +#include +#include #include -#include #include -#include -#include - -#include "ExampleHelper.hpp" /// Prints help message describing system usage. void printHelpInfo() @@ -41,16 +39,17 @@ using namespace ReSolve::constants; /// Prototype of the example function template -static int sysRefactor(int argc, char *argv[]); +static int sysRefactor(int argc, char* argv[]); /// Main function selects example to be run. -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { ReSolve::CliOptions options(argc, argv); // If help flag is passed, print help message and return bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -98,18 +97,19 @@ int main(int argc, char *argv[]) * @return 0 if the example ran successfully, -1 otherwise */ template -int sysRefactor(int argc, char *argv[]) +int sysRefactor(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. using namespace ReSolve::examples; using namespace ReSolve; - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -117,44 +117,56 @@ int sysRefactor(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = atoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n"; printHelpInfo(); } std::string matrix_pathname(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printHelpInfo(); } std::string rhs_pathname(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printHelpInfo(); } std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } - std::cout << "Family matrix file name: " << matrix_pathname + std::cout << "Family matrix file name: " << matrix_pathname << ", total number of matrices: " << num_systems << "\n" - << "Family rhs file name: " << rhs_pathname - << ", total number of RHSes: " << num_systems << "\n"; + << "Family rhs file name: " << rhs_pathname + << ", total number of RHSes: " << num_systems << "\n"; int status = 0; @@ -163,7 +175,7 @@ int sysRefactor(int argc, char *argv[]) // Create a helper object (computing errors, printing summaries, etc.) ExampleHelper helper(workspace); - std::string hw_backend = helper.getHardwareBackend(); + std::string hw_backend = helper.getHardwareBackend(); std::cout << "sysRefactor with " << hw_backend << " backend\n"; MatrixHandler matrix_handler(&workspace); @@ -176,11 +188,16 @@ int sysRefactor(int argc, char *argv[]) // Create system solver std::string refactor("none"); - if (hw_backend == "CUDA") { + if (hw_backend == "CUDA") + { refactor = "cusolverrf"; - } else if (hw_backend == "HIP") { + } + else if (hw_backend == "HIP") + { refactor = "rocsolverrf"; - } else { + } + else + { refactor = "klu"; } @@ -192,14 +209,17 @@ int sysRefactor(int argc, char *argv[]) "none"); // iterative refinement // Disable iterative refinement temporarily for CPU backend - if (hw_backend == "CPU") { + if (hw_backend == "CPU") + { is_iterative_refinement = false; } - if (is_iterative_refinement) { + if (is_iterative_refinement) + { solver.setRefinementMethod("fgmres", "cgs2"); solver.getIterativeSolver().setCliParam("restart", "100"); - if (hw_backend == "CUDA") { + if (hw_backend == "CUDA") + { solver.getIterativeSolver().setTol(1e-17); } } @@ -212,19 +232,19 @@ int sysRefactor(int argc, char *argv[]) std::ostringstream matname; std::ostringstream rhsname; matname << matrix_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; std::string matrix_pathname_full = matname.str(); std::string rhs_pathname_full = rhsname.str(); // Read matrix and right-hand-side vector std::ifstream mat_file(matrix_pathname_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname_full << "\n"; return 1; } std::ifstream rhs_file(rhs_pathname_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname_full << "\n"; return 1; @@ -232,15 +252,19 @@ int sysRefactor(int argc, char *argv[]) // Refactorization is LU-based, so need to expand symmetric matrices bool is_expand_symmetric = true; - if (i == 0) { - A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); + if (i == 0) + { + A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = ReSolve::io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memory::HOST); - if (hw_backend == "CUDA" || hw_backend == "HIP") { + if (hw_backend == "CUDA" || hw_backend == "HIP") + { vec_x->allocate(memory::DEVICE); } - } else { + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -249,7 +273,8 @@ int sysRefactor(int argc, char *argv[]) rhs_file.close(); // Ensure matrix data is synced to the device before any GPU operations - if (hw_backend == "CUDA" || hw_backend == "HIP") { + if (hw_backend == "CUDA" || hw_backend == "HIP") + { A->syncData(memory::DEVICE); vec_rhs->syncData(memory::DEVICE); } @@ -258,10 +283,12 @@ int sysRefactor(int argc, char *argv[]) printSystemInfo(matrix_pathname_full, A); // Now call direct solver - if (i == 0) { + if (i == 0) + { // Set matrix in solver after the initial matrix is loaded status = solver.setMatrix(A); - if (status != 0) { + if (status != 0) + { std::cout << "Failed to set matrix in solver. Status: " << status << std::endl; return 1; } @@ -273,7 +300,9 @@ int sysRefactor(int argc, char *argv[]) // Numeric factorization on the host status = solver.factorize(); std::cout << "Numeric factorization on the host status: " << status << std::endl; - } else if (i == 1) { + } + else if (i == 1) + { // Numeric factorization on the host status = solver.factorize(); std::cout << "Numeric factorization on the host status: " << status << std::endl; @@ -281,8 +310,9 @@ int sysRefactor(int argc, char *argv[]) // Set up refactorization solver status = solver.refactorizationSetup(); std::cout << "Refactorization setup status: " << status << std::endl; - - } else { + } + else + { // Refactorize on the device status = solver.refactorize(); std::cout << "Refactorization on the device status: " << status << std::endl; @@ -294,7 +324,8 @@ int sysRefactor(int argc, char *argv[]) // Print summary of results helper.resetSystem(A, vec_rhs, vec_x); helper.printShortSummary(); - if ((i > 1) && is_iterative_refinement) { + if ((i > 1) && is_iterative_refinement) + { helper.printIrSummary(&(solver.getIterativeSolver())); } } diff --git a/renderDocs.sh b/renderDocs.sh index 08475584e..8a0e4770a 100755 --- a/renderDocs.sh +++ b/renderDocs.sh @@ -8,4 +8,4 @@ sphinx-build . ./sphinx/_build && doxygen ./doxygen/Doxyfile.in && # Host the docs locally with python python3 -m http.server --directory ./sphinx/_build -popd \ No newline at end of file +popd diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index b7b5e67a4..5dab92a49 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -1,6 +1,6 @@ #[[ -@brief Build ReSolve library +@brief Build Re::Solve library @author Slaven Peles diff --git a/resolve/Common.hpp b/resolve/Common.hpp index 9b467c3ad..af30adadb 100644 --- a/resolve/Common.hpp +++ b/resolve/Common.hpp @@ -2,36 +2,37 @@ #include -//TODO: temporary +// TODO: temporary #include -namespace ReSolve { +namespace ReSolve +{ /// @todo Provide CMake option to se these types at config time - using real_type = double; + using real_type = double; using index_type = std::int32_t; namespace constants { - constexpr real_type ZERO = 0.0; - constexpr real_type ONE = 1.0; - constexpr real_type TWO = 2.0; - constexpr real_type HALF = 0.5; + constexpr real_type ZERO = 0.0; + constexpr real_type ONE = 1.0; + constexpr real_type TWO = 2.0; + constexpr real_type HALF = 0.5; constexpr real_type MINUS_ONE = -1.0; constexpr real_type MACHINE_EPSILON = std::numeric_limits::epsilon(); - } + } // namespace constants namespace colors { // must be const pointer and const dest for // const string declarations to pass -Wwrite-strings - static const char * const RED = "\033[1;31m"; - static const char * const GREEN = "\033[1;32m"; - static const char * const YELLOW = "\033[33;1m"; - static const char * const BLUE = "\033[34;1m"; - static const char * const ORANGE = "\u001b[38;5;208m"; - static const char * const CLEAR = "\033[0m"; - } + static const char* const RED = "\033[1;31m"; + static const char* const GREEN = "\033[1;32m"; + static const char* const YELLOW = "\033[33;1m"; + static const char* const BLUE = "\033[34;1m"; + static const char* const ORANGE = "\u001b[38;5;208m"; + static const char* const CLEAR = "\033[0m"; + } // namespace colors } // namespace ReSolve diff --git a/resolve/Doxygen.hpp b/resolve/Doxygen.hpp index 905c05595..2a5eea857 100644 --- a/resolve/Doxygen.hpp +++ b/resolve/Doxygen.hpp @@ -1,31 +1,31 @@ /** * @file Doxygen.hpp * @author Slaven Peles (peless@ornl.gov) - * @brief - * + * @brief + * * @mainpage ReSolve Source Code Documentation - * + * * ReSolve is a library of GPU-resident linear solvers. It contains iterative * and direct linear solvers designed to run on NVIDIA and AMD GPUs, as well as * on CPU devices. This is the main page of source code documentation intended * for developers who want to contribute to ReSolve code. General documentation * is available at readthedocs. The * ReSolve project is hosted on GitHub. - * - * + * + * * @section name_sec Name - * + * * Linear solvers are typically used within an application where a series of * systems with same sparsity pattern is solved one after another, such as in * the case of dynamic simulations or optimization. An efficient linear solver * design will _re-solve_ systems with the same sparsity pattern while reusing * symbolic operations and memory allocations from the prior systems, therefore * the name ReSolve. - * + * * @section history_sec History - * + * * The development of Re::Solve sparse linear solver library started as a part - * of Stochastic Grid Dynamics at Exascale + * of Stochastic Grid Dynamics at Exascale * (ExaSGD) * subproject of the Exascale Computing Project * (ECP). The overarching @@ -48,19 +48,19 @@ * pattern without unnecessary recomputation and re-allocations, easy to * integrate with applications, and capable of running on both AMD and NVIDIA * GPUs. - * + * * @section design_sec Code Design and Organization - * + * * @subsection solvers_subsec Solvers - * + * * @subsection matvecs_subsec Matrix and Vector Classes - * + * * @subsection handlers_subsec Matrix and Vector Handlers - * + * * @subsection workspaces_subsec Workspaces - * + * * @subsection backends_subsec Hardware Backends - * + * * @subsection utils_subsec Utilities - * + * */ diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index a9268273e..725a0d1d0 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -1,10 +1,11 @@ -#include +#include "GramSchmidt.hpp" + #include #include +#include #include #include -#include "GramSchmidt.hpp" namespace ReSolve { @@ -12,24 +13,28 @@ namespace ReSolve index_type idxmap(index_type i, index_type j, index_type col_length) { - return i * (col_length) + j; + return i * (col_length) + j; } - GramSchmidt::GramSchmidt(VectorHandler* vh, GSVariant variant) + GramSchmidt::GramSchmidt(VectorHandler* vh, GSVariant variant) : variant_(variant), setup_complete_(false), vector_handler_(vh) { - if (vector_handler_->getIsCudaEnabled() || vector_handler_->getIsHipEnabled()) { + if (vector_handler_->getIsCudaEnabled() || vector_handler_->getIsHipEnabled()) + { memspace_ = memory::DEVICE; - } else { + } + else + { memspace_ = memory::HOST; } } GramSchmidt::~GramSchmidt() { - if (setup_complete_) { + if (setup_complete_) + { freeGramSchmidtData(); } } @@ -45,12 +50,14 @@ namespace ReSolve int GramSchmidt::setVariant(GSVariant variant) { // If the same variant is already set, do nothing. - if(variant == variant_) { + if (variant == variant_) + { return 0; } // If Gram-Scmidt data is not allocated, just set the variant and exit. - if (!setup_complete_) { + if (!setup_complete_) + { variant_ = variant; return 0; } @@ -85,8 +92,10 @@ namespace ReSolve int GramSchmidt::setup(index_type n, index_type restart) { - if (setup_complete_) { - if ((vec_v_->getSize() != n) || (num_vecs_ != restart)) { + if (setup_complete_) + { + if ((vec_v_->getSize() != n) || (num_vecs_ != restart)) + { freeGramSchmidtData(); } } @@ -96,8 +105,9 @@ namespace ReSolve vec_x_ = new vector_type(n, 2); // n x 2 multivector view num_vecs_ = restart; - if((variant_ == MGS_TWO_SYNC) || (variant_ == MGS_PM)) { - h_L_ = new real_type[num_vecs_ * (num_vecs_ + 1)](); + if ((variant_ == MGS_TWO_SYNC) || (variant_ == MGS_PM)) + { + h_L_ = new real_type[num_vecs_ * (num_vecs_ + 1)](); vec_rv_ = new vector_type(num_vecs_ + 1, 2); vec_rv_->allocate(memspace_); @@ -107,18 +117,21 @@ namespace ReSolve vec_Hcolumn_->allocate(memspace_); vec_Hcolumn_->setToZero(memspace_); } - if(variant_ == CGS2) { - h_aux_ = new real_type[num_vecs_ + 1](); + if (variant_ == CGS2) + { + h_aux_ = new real_type[num_vecs_ + 1](); vec_Hcolumn_ = new vector_type(num_vecs_ + 1); vec_Hcolumn_->allocate(memspace_); vec_Hcolumn_->setToZero(memspace_); } - if(variant_ == CGS1) { + if (variant_ == CGS1) + { vec_Hcolumn_ = new vector_type(num_vecs_ + 1); vec_Hcolumn_->allocate(memspace_); vec_Hcolumn_->setToZero(memspace_); } - if(variant_ == MGS_PM) { + if (variant_ == MGS_PM) + { h_aux_ = new real_type[num_vecs_ + 1](); } @@ -130,236 +143,267 @@ namespace ReSolve { using namespace constants; - double t = 0.0; - double s = 0.0; + double t = 0.0; + double s = 0.0; real_type* h_rv = nullptr; - switch (variant_) { - case MGS: - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - for(int j = 0; j <= i; ++j) { - t = 0.0; - vec_v_->setData( V->getData(j, memspace_), memspace_); - t = vector_handler_->dot(vec_v_, vec_w_, memspace_); - H[ idxmap(i, j, num_vecs_ + 1) ] = t; - t *= -1.0; - vector_handler_->axpy(&t, vec_v_, vec_w_, memspace_); - } - t = 0.0; - t = vector_handler_->dot(vec_w_, vec_w_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_w_, memspace_); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return 1; - } - return 0; - - case CGS2: - vec_v_->setData(V->getData(i + 1, memspace_), memspace_); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_ ); - mem_.deviceSynchronize(); - - // copy H_col to aux, we will need it later - vec_Hcolumn_->setDataUpdated(memspace_); - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataTo(h_aux_, 0, memory::HOST); - mem_.deviceSynchronize(); - - //Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); - mem_.deviceSynchronize(); - - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_ ); - mem_.deviceSynchronize(); - - // copy H_col to H - vec_Hcolumn_->setDataUpdated(memspace_); - vec_Hcolumn_->copyDataTo(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - mem_.deviceSynchronize(); - - // add both pieces together (unstable otherwise, careful here!!) + switch (variant_) + { + case MGS: + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); + for (int j = 0; j <= i; ++j) + { t = 0.0; - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1)] += h_aux_[j]; - } + vec_v_->setData(V->getData(j, memspace_), memspace_); + t = vector_handler_->dot(vec_v_, vec_w_, memspace_); + H[idxmap(i, j, num_vecs_ + 1)] = t; + t *= -1.0; + vector_handler_->axpy(&t, vec_v_, vec_w_, memspace_); + } + t = 0.0; + t = vector_handler_->dot(vec_w_, vec_w_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace_); + } + else + { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return 1; + } + return 0; - t = vector_handler_->dot(vec_v_, vec_v_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_v_, memspace_); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return 1; - } - return 0; - - case MGS_TWO_SYNC: - // V[1:i]^T[V[i] w] - vec_x_->setData(V->getData(i, memspace_), memspace_); - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - vec_rv_->resize(i + 1); - - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); - vec_rv_->setDataUpdated(memspace_); - if (memspace_ == memory::DEVICE) { - vec_rv_->syncData(memory::HOST); - } + case CGS2: + vec_v_->setData(V->getData(i + 1, memspace_), memspace_); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + mem_.deviceSynchronize(); + + // copy H_col to aux, we will need it later + vec_Hcolumn_->setDataUpdated(memspace_); + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataTo(h_aux_, 0, memory::HOST); + mem_.deviceSynchronize(); + + // Hcol = V(:,1:i)^T*V(:,i+1); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + mem_.deviceSynchronize(); + + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + mem_.deviceSynchronize(); + + // copy H_col to H + vec_Hcolumn_->setDataUpdated(memspace_); + vec_Hcolumn_->copyDataTo(&H[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + mem_.deviceSynchronize(); + + // add both pieces together (unstable otherwise, careful here!!) + t = 0.0; + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] += h_aux_[j]; + } - vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - h_rv = vec_rv_->getData(1, memory::HOST); + t = vector_handler_->dot(vec_v_, vec_v_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; - for(int j=0; j<=i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; - } - // triangular solve - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = h_rv[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * H[ idxmap(i, k, num_vecs_ + 1) ]; - } // for k - H[ idxmap(i, j, num_vecs_ + 1) ] -= s; - } // for j - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataFrom(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); - - // normalize (second synch) - t = vector_handler_->dot(vec_w_, vec_w_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1)] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); - for (int ii=0; ii<=i; ++ii) - { - vec_v_->setData(V->getData(ii, memspace_), memspace_); - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - } - } else { - assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); - return 1; - } - h_rv = nullptr; - return 0; - - case MGS_PM: - vec_x_->setData(V->getData(i, memspace_), memspace_); - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - vec_rv_->resize(i + 1); - - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); - vec_rv_->setDataUpdated(memspace_); - if (memspace_ == memory::DEVICE) { - vec_rv_->syncData(memory::HOST); - } + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_v_, memspace_); + } + else + { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return 1; + } + return 0; - vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - h_rv = vec_rv_->getData(1, memory::HOST); + case MGS_TWO_SYNC: + // V[1:i]^T[V[i] w] + vec_x_->setData(V->getData(i, memspace_), memspace_); + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); + vec_rv_->resize(i + 1); + + vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vec_rv_->setDataUpdated(memspace_); + if (memspace_ == memory::DEVICE) + { + vec_rv_->syncData(memory::HOST); + } - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; - } + vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv = vec_rv_->getData(1, memory::HOST); - //triangular solve - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = h_rv[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * H[ idxmap(i, k, num_vecs_ + 1) ]; - } // for k - H[ idxmap(i, j, num_vecs_ + 1) ] -= s; - } // for j - - // now compute h_rv = L^T h_H - double h; - for(int j = 0; j <= i; ++j) { - // go through COLUMN OF L - h_rv[j] = 0.0; - for(int k = j + 1; k <= i; ++k) { - h = h_L_[ idxmap(k, j, num_vecs_ + 1)]; - h_rv[j] += H[ idxmap(i, k, num_vecs_ + 1) ] * h; - } + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = 0.0; + } + // triangular solve + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = h_rv[j]; + s = 0.0; + for (int k = 0; k < j; ++k) + { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * H[idxmap(i, k, num_vecs_ + 1)]; + } // for k + H[idxmap(i, j, num_vecs_ + 1)] -= s; + } // for j + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataFrom(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); + vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + + // normalize (second synch) + t = vector_handler_->dot(vec_w_, vec_w_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace_); + for (int ii = 0; ii <= i; ++ii) + { + vec_v_->setData(V->getData(ii, memspace_), memspace_); + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); } + } + else + { + assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); + return 1; + } + h_rv = nullptr; + return 0; - // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv - for(int j = 0; j <= i; ++j) { - h_aux_[j] = h_rv[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * h_aux_[k]; - } // for k - h_aux_[j] -= s; - } // for j - - // and now subtract that from h_H - for(int j=0; j<=i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] -= h_aux_[j]; - } + case MGS_PM: + vec_x_->setData(V->getData(i, memspace_), memspace_); + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); + vec_rv_->resize(i + 1); - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataFrom(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); - // normalize (second synch) - t = vector_handler_->dot(vec_w_, vec_w_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); - return 1; - } - h_rv = nullptr; - return 0; - - case CGS1: - vec_v_->setData(V->getData(i + 1, memspace_), memspace_); - //Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_ ); - mem_.deviceSynchronize(); - - // copy H_col to H - vec_Hcolumn_->setDataUpdated(memspace_); - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataTo(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - mem_.deviceSynchronize(); - - t = vector_handler_->dot(vec_v_, vec_v_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_v_, memspace_); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return 1; + vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vec_rv_->setDataUpdated(memspace_); + if (memspace_ == memory::DEVICE) + { + vec_rv_->syncData(memory::HOST); + } + + vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv = vec_rv_->getData(1, memory::HOST); + + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = 0.0; + } + + // triangular solve + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = h_rv[j]; + s = 0.0; + for (int k = 0; k < j; ++k) + { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * H[idxmap(i, k, num_vecs_ + 1)]; + } // for k + H[idxmap(i, j, num_vecs_ + 1)] -= s; + } // for j + + // now compute h_rv = L^T h_H + double h; + for (int j = 0; j <= i; ++j) + { + // go through COLUMN OF L + h_rv[j] = 0.0; + for (int k = j + 1; k <= i; ++k) + { + h = h_L_[idxmap(k, j, num_vecs_ + 1)]; + h_rv[j] += H[idxmap(i, k, num_vecs_ + 1)] * h; } - return 0; + } + + // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv + for (int j = 0; j <= i; ++j) + { + h_aux_[j] = h_rv[j]; + s = 0.0; + for (int k = 0; k < j; ++k) + { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * h_aux_[k]; + } // for k + h_aux_[j] -= s; + } // for j + + // and now subtract that from h_H + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] -= h_aux_[j]; + } - default: - assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataFrom(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); + + vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + // normalize (second synch) + t = vector_handler_->dot(vec_w_, vec_w_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace_); + } + else + { + assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); return 1; - } //switch + } + h_rv = nullptr; + return 0; + + case CGS1: + vec_v_->setData(V->getData(i + 1, memspace_), memspace_); + // Hcol = V(:,1:i)^T*V(:,i+1); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + mem_.deviceSynchronize(); + + // copy H_col to H + vec_Hcolumn_->setDataUpdated(memspace_); + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataTo(&H[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + mem_.deviceSynchronize(); + + t = vector_handler_->dot(vec_v_, vec_v_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_v_, memspace_); + } + else + { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return 1; + } + return 0; + + default: + assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); + return 1; + } // switch return 0; } // int orthogonalize() @@ -370,7 +414,8 @@ namespace ReSolve int GramSchmidt::freeGramSchmidtData() { - if(variant_ == MGS_TWO_SYNC || variant_ == MGS_PM) { + if (variant_ == MGS_TWO_SYNC || variant_ == MGS_PM) + { delete[] h_L_; h_L_ = nullptr; @@ -380,19 +425,22 @@ namespace ReSolve vec_Hcolumn_ = nullptr; } - if (variant_ == CGS2) { + if (variant_ == CGS2) + { delete[] h_aux_; h_aux_ = nullptr; delete vec_Hcolumn_; vec_Hcolumn_ = nullptr; } - if (variant_ == CGS1) { + if (variant_ == CGS1) + { delete vec_Hcolumn_; vec_Hcolumn_ = nullptr; } - if (variant_ == MGS_PM) { + if (variant_ == MGS_PM) + { delete[] h_aux_; h_aux_ = nullptr; } @@ -406,5 +454,4 @@ namespace ReSolve return 0; } - } // namespace ReSolve diff --git a/resolve/GramSchmidt.hpp b/resolve/GramSchmidt.hpp index 062c3de32..1a366cca9 100644 --- a/resolve/GramSchmidt.hpp +++ b/resolve/GramSchmidt.hpp @@ -1,57 +1,60 @@ #pragma once -#include #include +#include #include "Common.hpp" -#include #include +#include -namespace ReSolve +namespace ReSolve { class GramSchmidt { - private: - using vector_type = vector::Vector; - - public: - enum GSVariant {MGS = 0, - CGS2, - MGS_TWO_SYNC, - MGS_PM, - CGS1}; - - GramSchmidt() = delete; - GramSchmidt(VectorHandler* vh, GSVariant variant); - ~GramSchmidt(); - int setVariant(GramSchmidt::GSVariant variant); - GSVariant getVariant(); - real_type* getL(); //only for low synch, returns null ptr otherwise - - int setup(index_type n, index_type restart); - int orthogonalize(index_type n, vector_type* V, real_type* H, index_type i); - bool isSetupComplete(); - - private: - int freeGramSchmidtData(); - - GSVariant variant_{MGS}; - bool setup_complete_{false}; //to avoid double allocations - - index_type num_vecs_; //the same as restart - vector_type* vec_rv_{nullptr}; - vector_type* vec_Hcolumn_{nullptr}; - - real_type* h_L_{nullptr}; - real_type* h_aux_{nullptr}; - VectorHandler* vector_handler_{nullptr}; - - vector_type* vec_v_{nullptr}; // aux variable - vector_type* vec_w_{nullptr}; // aux variable - vector_type* vec_x_{nullptr}; // aux variable - - MemoryHandler mem_; ///< Device memory manager object - memory::MemorySpace memspace_; + private: + using vector_type = vector::Vector; + + public: + enum GSVariant + { + MGS = 0, + CGS2, + MGS_TWO_SYNC, + MGS_PM, + CGS1 + }; + + GramSchmidt() = delete; + GramSchmidt(VectorHandler* vh, GSVariant variant); + ~GramSchmidt(); + int setVariant(GramSchmidt::GSVariant variant); + GSVariant getVariant(); + real_type* getL(); // only for low synch, returns null ptr otherwise + + int setup(index_type n, index_type restart); + int orthogonalize(index_type n, vector_type* V, real_type* H, index_type i); + bool isSetupComplete(); + + private: + int freeGramSchmidtData(); + + GSVariant variant_{MGS}; + bool setup_complete_{false}; // to avoid double allocations + + index_type num_vecs_; // the same as restart + vector_type* vec_rv_{nullptr}; + vector_type* vec_Hcolumn_{nullptr}; + + real_type* h_L_{nullptr}; + real_type* h_aux_{nullptr}; + VectorHandler* vector_handler_{nullptr}; + + vector_type* vec_v_{nullptr}; // aux variable + vector_type* vec_w_{nullptr}; // aux variable + vector_type* vec_x_{nullptr}; // aux variable + + MemoryHandler mem_; ///< Device memory manager object + memory::MemorySpace memspace_; }; } // namespace ReSolve diff --git a/resolve/LinSolver.cpp b/resolve/LinSolver.cpp index 75bc15b6d..5791e691b 100644 --- a/resolve/LinSolver.cpp +++ b/resolve/LinSolver.cpp @@ -3,16 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of linear solver base class. - * + * */ -#include -#include - #include "LinSolver.hpp" +#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; @@ -22,25 +21,23 @@ namespace ReSolve LinSolver::~LinSolver() { - //destroy the matrix and hadlers + // destroy the matrix and hadlers } real_type LinSolver::evaluateResidual() { - //to be implemented + // to be implemented return 1.0; } int LinSolver::getParamId(std::string id) const { auto it = params_list_.find(id); - if (it == params_list_.end()) { + if (it == params_list_.end()) + { out::error() << "Unknown parameter " << id << ".\n"; return 999; } return (*it).second; } -} - - - +} // namespace ReSolve diff --git a/resolve/LinSolver.hpp b/resolve/LinSolver.hpp index 556b1412d..efc3dcc51 100644 --- a/resolve/LinSolver.hpp +++ b/resolve/LinSolver.hpp @@ -3,7 +3,7 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of linear solver base class. - * + * */ #pragma once @@ -12,7 +12,7 @@ #include "Common.hpp" -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -32,38 +32,37 @@ namespace ReSolve // Forward declaration of MatrixHandler class class MatrixHandler; - /** * @brief Base class for all linear solvers. - * + * */ - class LinSolver + class LinSolver { - protected: - using vector_type = vector::Vector; + protected: + using vector_type = vector::Vector; + + public: + LinSolver(); + virtual ~LinSolver(); - public: - LinSolver(); - virtual ~LinSolver(); + real_type evaluateResidual(); - real_type evaluateResidual(); + virtual int setCliParam(const std::string /* id */, const std::string /* value */) = 0; + virtual std::string getCliParamString(const std::string /* id */) const = 0; + virtual index_type getCliParamInt(const std::string /* id */) const = 0; + virtual real_type getCliParamReal(const std::string /* id */) const = 0; + virtual bool getCliParamBool(const std::string /* id */) const = 0; + virtual int printCliParam(const std::string /* id */) const = 0; - virtual int setCliParam(const std::string /* id */, const std::string /* value */) = 0; - virtual std::string getCliParamString(const std::string /* id */) const = 0; - virtual index_type getCliParamInt(const std::string /* id */) const = 0; - virtual real_type getCliParamReal(const std::string /* id */) const = 0; - virtual bool getCliParamBool(const std::string /* id */) const = 0; - virtual int printCliParam(const std::string /* id */) const = 0; - - protected: - int getParamId(std::string id) const; + protected: + int getParamId(std::string id) const; - matrix::Sparse* A_{nullptr}; + matrix::Sparse* A_{nullptr}; - MatrixHandler* matrix_handler_{nullptr}; - VectorHandler* vector_handler_{nullptr}; + MatrixHandler* matrix_handler_{nullptr}; + VectorHandler* vector_handler_{nullptr}; - std::map params_list_; + std::map params_list_; }; } // namespace ReSolve diff --git a/resolve/LinSolverDirect.cpp b/resolve/LinSolverDirect.cpp index 6eaf64ee8..b11370953 100644 --- a/resolve/LinSolverDirect.cpp +++ b/resolve/LinSolverDirect.cpp @@ -3,16 +3,17 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of direct solver base class. - * + * */ +#include #include #include -#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; + /** * @brief Constructor for LinSolverDirect class. * @@ -37,27 +38,29 @@ namespace ReSolve * @brief Setup function for LinSolverDirect class. * * @param[in] A - matrix to be solved - * @param[in] L - optional lower triangular factor + * @param[in] L - optional lower triangular factor * @param[in] U - optional upper triangular factor * @param[in] P - optional row permutation vector * @param[in] Q - optional column permutation vector * @param[in] rhs - optional right-hand side vector - * + * * @return int - error code, 0 if successful */ int LinSolverDirect::setup(matrix::Sparse* A, matrix::Sparse* /* L */, matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) + index_type* /* P */, + index_type* /* Q */, + vector_type* /* rhs */) { - if (A == nullptr) { + if (A == nullptr) + { return 1; } A_ = A; return 0; } + /** * @brief Placeholder function for symbolic factorization. */ @@ -65,6 +68,7 @@ namespace ReSolve { return 1; } + /** * @brief Placeholder function for numeric factorization. */ @@ -72,6 +76,7 @@ namespace ReSolve { return 1; } + /** * @brief Placeholder function for refactorization. */ @@ -79,34 +84,35 @@ namespace ReSolve { return 1; } + /** * @brief Placeholder function for lower triangular factor. */ matrix::Sparse* LinSolverDirect::getLFactor() { return nullptr; - } - + } + /** * @brief Placeholder function for upper triangular factor. */ matrix::Sparse* LinSolverDirect::getUFactor() { return nullptr; - } - + } + /** * @brief Placeholder function for row permutation vector. */ - index_type* LinSolverDirect::getPOrdering() + index_type* LinSolverDirect::getPOrdering() { return nullptr; - } - + } + /** * @brief Placeholder function for column permutation vector. */ - index_type* LinSolverDirect::getQOrdering() + index_type* LinSolverDirect::getQOrdering() { return nullptr; } diff --git a/resolve/LinSolverDirect.hpp b/resolve/LinSolverDirect.hpp index 7623f074c..eefb75c2e 100644 --- a/resolve/LinSolverDirect.hpp +++ b/resolve/LinSolverDirect.hpp @@ -3,44 +3,44 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of direct solver base class. - * + * */ #pragma once #include + #include -namespace ReSolve +namespace ReSolve { - class LinSolverDirect : public LinSolver + class LinSolverDirect : public LinSolver { - public: - LinSolverDirect(); - virtual ~LinSolverDirect(); - virtual int setup(matrix::Sparse* A = nullptr, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr); + public: + LinSolverDirect(); + virtual ~LinSolverDirect(); + virtual int setup(matrix::Sparse* A = nullptr, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr); - virtual int analyze(); //the same as symbolic factorization - virtual int factorize(); - virtual int refactorize(); - virtual int solve(vector_type* rhs, vector_type* x) = 0; - virtual int solve(vector_type* x) = 0; - - virtual matrix::Sparse* getLFactor(); - virtual matrix::Sparse* getUFactor(); - virtual index_type* getPOrdering(); - virtual index_type* getQOrdering(); + virtual int analyze(); // the same as symbolic factorization + virtual int factorize(); + virtual int refactorize(); + virtual int solve(vector_type* rhs, vector_type* x) = 0; + virtual int solve(vector_type* x) = 0; - protected: - matrix::Sparse* L_{nullptr}; - matrix::Sparse* U_{nullptr}; - index_type* P_{nullptr}; - index_type* Q_{nullptr}; + virtual matrix::Sparse* getLFactor(); + virtual matrix::Sparse* getUFactor(); + virtual index_type* getPOrdering(); + virtual index_type* getQOrdering(); + protected: + matrix::Sparse* L_{nullptr}; + matrix::Sparse* U_{nullptr}; + index_type* P_{nullptr}; + index_type* Q_{nullptr}; }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCpuILU0.cpp b/resolve/LinSolverDirectCpuILU0.cpp index 92f6b289e..cffb2647b 100644 --- a/resolve/LinSolverDirectCpuILU0.cpp +++ b/resolve/LinSolverDirectCpuILU0.cpp @@ -2,42 +2,43 @@ * @file LinSolverDirectCpuILU0.cpp * @author Slaven Peles (peless@ornl.gov) * @brief Contains definition of a class for incomplete LU factorization on CPU - * - * + * + * */ +#include "LinSolverDirectCpuILU0.hpp" + #include -#include #include -#include #include +#include +#include -#include "LinSolverDirectCpuILU0.hpp" - -namespace ReSolve +namespace ReSolve { using out = io::Logger; LinSolverDirectCpuILU0::LinSolverDirectCpuILU0(LinAlgWorkspaceCpu* /* workspace */) - // : workspace_(workspace) + // : workspace_(workspace) { } /** * @brief Destructor - * + * * @todo Address how L and U factors are deleted (currently base class does that). */ LinSolverDirectCpuILU0::~LinSolverDirectCpuILU0() { - if (owns_factors_) { + if (owns_factors_) + { delete L_; delete U_; L_ = nullptr; U_ = nullptr; } - delete [] diagU_; - delete [] idxmap_; + delete[] diagU_; + delete[] idxmap_; } int LinSolverDirectCpuILU0::setup(matrix::Sparse* A, @@ -45,12 +46,12 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - int error_sum = 0; - A_ = dynamic_cast(A); - error_sum += analyze(); - error_sum += factorize(); + int error_sum = 0; + A_ = dynamic_cast(A); + error_sum += analyze(); + error_sum += factorize(); return error_sum; } @@ -64,27 +65,33 @@ namespace ReSolve A_ = dynamic_cast(A); index_type* rowsL = L_->getRowData(HOST); - real_type* valsL = L_->getValues(HOST); + real_type* valsL = L_->getValues(HOST); index_type* rowsU = U_->getRowData(HOST); index_type* colsU = U_->getColData(HOST); - real_type* valsU = U_->getValues(HOST); + real_type* valsU = U_->getValues(HOST); const index_type* colsA = A_->getColData(HOST); - const real_type* valsA = A_->getValues(HOST); + const real_type* valsA = A_->getValues(HOST); // Update values in L and U factors - const index_type N = A_->getNumRows(); - index_type acount = 0; - for (index_type i = 0; i < N; ++i) { - for (index_type j = rowsL[i]; j < rowsL[i+1]; ++j) { - valsL[j] = valsA[acount]; - ++acount; + const index_type N = A_->getNumRows(); + index_type acount = 0; + for (index_type i = 0; i < N; ++i) + { + for (index_type j = rowsL[i]; j < rowsL[i + 1]; ++j) + { + valsL[j] = valsA[acount]; + ++acount; } - for (index_type j = rowsU[i]; j < rowsU[i+1]; ++j) { - if ((colsU[j] == i) && (colsA[acount] != i)) { + for (index_type j = rowsU[i]; j < rowsU[i + 1]; ++j) + { + if ((colsU[j] == i) && (colsA[acount] != i)) + { valsU[j] = zero_diagonal_; - } else { + } + else + { valsU[j] = valsA[acount]; ++acount; } @@ -101,7 +108,7 @@ namespace ReSolve using namespace memory; int error_sum = 0; - const index_type N = A_->getNumRows(); + const index_type N = A_->getNumRows(); const index_type* rowsA = A_->getRowData(memory::HOST); const index_type* colsA = A_->getColData(memory::HOST); const real_type* valsA = A_->getValues(memory::HOST); @@ -120,23 +127,32 @@ namespace ReSolve // Find number of nonzeros and row pointers for L and U factors bool has_diagonal = false; - for (index_type i = 0; i < N; ++i) { + for (index_type i = 0; i < N; ++i) + { rowsL[i] = nnzL; rowsU[i] = nnzU; - for (index_type j = rowsA[i]; j < rowsA[i+1]; ++j) { - if (colsA[j] < i) { + for (index_type j = rowsA[i]; j < rowsA[i + 1]; ++j) + { + if (colsA[j] < i) + { nnzL++; - } else { - if (colsA[j] == i) { + } + else + { + if (colsA[j] == i) + { has_diagonal = true; - diagU_[i] = valsA[j] < zero_diagonal_ ? zero_diagonal_ : valsA[j]; + diagU_[i] = valsA[j] < zero_diagonal_ ? zero_diagonal_ : valsA[j]; } nnzU++; } } - if (has_diagonal) { + if (has_diagonal) + { has_diagonal = false; - } else { + } + else + { nnzU++; diagU_[i] = zero_diagonal_; } @@ -146,23 +162,27 @@ namespace ReSolve index_type* colsL = new index_type[nnzL]; index_type* colsU = new index_type[nnzU]; - real_type* valsL = new real_type[nnzL]; - real_type* valsU = new real_type[nnzU]; + real_type* valsL = new real_type[nnzL]; + real_type* valsU = new real_type[nnzU]; // Set data for L and U index_type lcount = 0; - index_type ucount = 0; - for (index_type i = 0; i < N; ++i) { + index_type ucount = 0; + for (index_type i = 0; i < N; ++i) + { colsU[ucount] = i; valsU[ucount] = diagU_[i]; ++ucount; - for (index_type j = rowsA[i]; j < rowsA[i+1]; ++j) { - if (colsA[j] < i) { + for (index_type j = rowsA[i]; j < rowsA[i + 1]; ++j) + { + if (colsA[j] < i) + { colsL[lcount] = colsA[j]; valsL[lcount] = valsA[j]; ++lcount; - } - if (colsA[j] > i) { + } + if (colsA[j] > i) + { colsU[ucount] = colsA[j]; valsU[ucount] = valsA[j]; ++ucount; @@ -191,42 +211,47 @@ namespace ReSolve index_type* rowsL = L_->getRowData(HOST); index_type* colsL = L_->getColData(HOST); - real_type* valsL = L_->getValues(HOST); + real_type* valsL = L_->getValues(HOST); index_type* rowsU = U_->getRowData(HOST); index_type* colsU = U_->getColData(HOST); - real_type* valsU = U_->getValues(HOST); + real_type* valsU = U_->getValues(HOST); index_type N = A_->getNumRows(); for (index_type u = 0; u < N; ++u) - idxmap_[u] = -1; + idxmap_[u] = -1; // Factorize (incompletely) - for (index_type i = 1; i < N; ++i) { - for (index_type v = rowsL[i]; v < rowsL[i+1]; ++v) { + for (index_type i = 1; i < N; ++i) + { + for (index_type v = rowsL[i]; v < rowsL[i + 1]; ++v) + { index_type k = colsL[v]; - for (index_type u = rowsU[k]; u < rowsU[k+1]; ++u) { - idxmap_[colsU[u]] = u; + for (index_type u = rowsU[k]; u < rowsU[k + 1]; ++u) + { + idxmap_[colsU[u]] = u; } valsL[v] /= valsU[rowsU[k]]; - for (index_type w = v+1; w < rowsL[i+1]; ++w) { - index_type j = idxmap_[colsL[w]]; + for (index_type w = v + 1; w < rowsL[i + 1]; ++w) + { + index_type j = idxmap_[colsL[w]]; if (j == -1) continue; - valsL[w] -= valsL[v]*valsU[j]; + valsL[w] -= valsL[v] * valsU[j]; } - for (index_type w = rowsU[i]; w < rowsU[i+1]; ++w) { - index_type j = idxmap_[colsU[w]]; + for (index_type w = rowsU[i]; w < rowsU[i + 1]; ++w) + { + index_type j = idxmap_[colsU[w]]; if (j == -1) continue; - valsU[w] -= valsL[v]*valsU[j]; + valsU[w] -= valsL[v] * valsU[j]; } for (index_type u = 0; u < N; ++u) - idxmap_[u] = -1; + idxmap_[u] = -1; } } @@ -235,7 +260,7 @@ namespace ReSolve /** * @brief Triangular solve - * + * * @param[in,out] rhs_vec - right-hand-side vector * @return int - error code */ @@ -254,8 +279,10 @@ namespace ReSolve real_type* valsL = L_->getValues(HOST); // Forward substitution - for (index_type i = 0; i < N; ++i) { - for (index_type j = rowsL[i]; j < rowsL[i+1]; ++j) { + for (index_type i = 0; i < N; ++i) + { + for (index_type j = rowsL[i]; j < rowsL[i + 1]; ++j) + { rhs[i] -= valsL[j] * rhs[colsL[j]]; } } @@ -265,8 +292,10 @@ namespace ReSolve real_type* valsU = U_->getValues(HOST); // Backward substitution - for (index_type i = N - 1; i >= 0; --i) { - for (index_type j = rowsU[i] + 1; j < rowsU[i+1]; ++j) { + for (index_type i = N - 1; i >= 0; --i) + { + for (index_type j = rowsU[i] + 1; j < rowsU[i + 1]; ++j) + { rhs[i] -= valsU[j] * rhs[colsU[j]]; } rhs[i] /= valsU[rowsU[i]]; @@ -277,8 +306,8 @@ namespace ReSolve /** * @brief Triangular solve - * - * @param[in] rhs_vec - right-hand-side vector + * + * @param[in] rhs_vec - right-hand-side vector * @param[out] x_vec - solution vector * @return int - status code */ @@ -299,9 +328,11 @@ namespace ReSolve const real_type* valsL = L_->getValues(HOST); // Forward substitution - for (index_type i = 0; i < N; ++i) { + for (index_type i = 0; i < N; ++i) + { x[i] = rhs[i]; - for (index_type j = rowsL[i]; j < rowsL[i+1]; ++j) { + for (index_type j = rowsL[i]; j < rowsL[i + 1]; ++j) + { x[i] -= valsL[j] * x[colsL[j]]; } } @@ -311,8 +342,10 @@ namespace ReSolve const real_type* valsU = U_->getValues(HOST); // Backward substitution - for (index_type i = N - 1; i >= 0; --i) { - for (index_type j = rowsU[i] + 1; j < rowsU[i+1]; ++j) { + for (index_type i = N - 1; i >= 0; --i) + { + for (index_type j = rowsU[i] + 1; j < rowsU[i + 1]; ++j) + { x[i] -= valsU[j] * x[colsU[j]]; } x[i] /= valsU[rowsU[i]]; @@ -333,11 +366,11 @@ namespace ReSolve /** * @brief Sets approximation to zero on matrix diagonal. - * + * * If the original matrix has structural zeros on the diagonal, the ILU0 * analysis will add diagonal elements and set them to `zero_diagonal_` * value. The default is 1e-6, this function allows user to change that. - * + * * @param z - small value approximating zero * @return int - returns status code */ @@ -349,11 +382,11 @@ namespace ReSolve /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -361,19 +394,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -381,19 +414,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -401,19 +434,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -421,19 +454,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -441,8 +474,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -458,4 +491,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCpuILU0.hpp b/resolve/LinSolverDirectCpuILU0.hpp index dbde29e20..5c8561bf1 100644 --- a/resolve/LinSolverDirectCpuILU0.hpp +++ b/resolve/LinSolverDirectCpuILU0.hpp @@ -3,7 +3,7 @@ * @author Slaven Peles (peless@ornl.gov) * @brief Contains declaration of a class for incomplete LU factorization on CPU * - * + * */ #pragma once @@ -11,7 +11,7 @@ #include #include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -23,66 +23,67 @@ namespace ReSolve namespace matrix { class Sparse; - } + class Csr; + } // namespace matrix // Forward declaration of CPU workspace class LinAlgWorkspaceCpu; /** * @brief Incomplete LU factorization solver. - * + * * Implements ILU0 factorization from Algorithm 1 in 2023 paper by Suzuki, * Fukaya, and Iwashita with modification where zero diagonal elements in * the matrix are replaced by small values specified in `zero_diagonal_`. * Factors L and U are stored in separate CSR matrices. Factor L does not * store ones at the diagonal. - * + * * Methods in this class perform all operations on raw matrix data. - * + * */ - class LinSolverDirectCpuILU0 : public LinSolverDirect + class LinSolverDirectCpuILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectCpuILU0(LinAlgWorkspaceCpu* workspace = nullptr); - ~LinSolverDirectCpuILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) - int reset(matrix::Sparse* A); - int analyze() override; - int factorize() override; - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) - matrix::Sparse* getLFactor() override; - matrix::Sparse* getUFactor() override; + public: + LinSolverDirectCpuILU0(LinAlgWorkspaceCpu* workspace = nullptr); + ~LinSolverDirectCpuILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) + int reset(matrix::Sparse* A); + int analyze() override; + int factorize() override; + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) + + matrix::Sparse* getLFactor() override; + matrix::Sparse* getUFactor() override; - int setZeroDiagonal(real_type z); + int setZeroDiagonal(real_type z); - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; - private: - // MemoryHandler mem_; ///< Device memory manager object - // LinAlgWorkspaceCpu* workspace_{nullptr}; + private: + // MemoryHandler mem_; ///< Device memory manager object + // LinAlgWorkspaceCpu* workspace_{nullptr}; - matrix::Csr* A_{nullptr}; ///< Pointer to the system matrix - real_type* diagU_{nullptr}; ///< Buffer holding diagonal of factor U - index_type* idxmap_{nullptr}; ///< Mapping for matrix column indices - bool owns_factors_{false}; ///< If the class owns L and U factors + matrix::Csr* A_{nullptr}; ///< Pointer to the system matrix + real_type* diagU_{nullptr}; ///< Buffer holding diagonal of factor U + index_type* idxmap_{nullptr}; ///< Mapping for matrix column indices + bool owns_factors_{false}; ///< If the class owns L and U factors - real_type zero_diagonal_{1e-6}; ///< Approximation for zero diagonal + real_type zero_diagonal_{1e-6}; ///< Approximation for zero diagonal }; } // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index f57de9ff9..c05b2295a 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -1,17 +1,18 @@ +#include "LinSolverDirectCuSolverGLU.hpp" + #include // includes memcpy #include -#include +#include #include -#include #include -#include "LinSolverDirectCuSolverGLU.hpp" -#include +#include +#include namespace ReSolve { using vector_type = vector::Vector; - using out = io::Logger; + using out = io::Logger; LinSolverDirectCuSolverGLU::LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace) { @@ -30,23 +31,23 @@ namespace ReSolve int LinSolverDirectCuSolverGLU::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, - index_type* P, - index_type* Q, + index_type* P, + index_type* Q, vector_type* /** rhs */) { RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - //get the handle - handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); - A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - index_type nnz = A_->getNnz(); - //create combined factor - combineFactors(L,U); + // get the handle + handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); + A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); + index_type nnz = A_->getNnz(); + // create combined factor + combineFactors(L, U); - //set up descriptors + // set up descriptors cusparseCreateMatDescr(&descr_M_); cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO); @@ -56,46 +57,46 @@ namespace ReSolve cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO); - //set up the GLU - status_cusolver_ = cusolverSpDgluSetup(handle_cusolversp_, + // set up the GLU + status_cusolver_ = cusolverSpDgluSetup(handle_cusolversp_, n, - nnz, - descr_A_, + nnz, + descr_A_, A_->getRowData(memory::HOST), A_->getColData(memory::HOST), - P, /** base-0 */ - Q, /** base-0 */ - M_->getNnz(), /** nnzM */ - descr_M_, - M_->getRowData(memory::HOST), - M_->getColData(memory::HOST), + P, /** base-0 */ + Q, /** base-0 */ + M_->getNnz(), /** nnzM */ + descr_M_, + M_->getRowData(memory::HOST), + M_->getColData(memory::HOST), info_M_); - error_sum += status_cusolver_; - //NOW the buffer + error_sum += status_cusolver_; + // NOW the buffer size_t buffer_size; - status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size); + error_sum += status_cusolver_; mem_.allocateBufferOnDevice(&glu_buffer_, buffer_size); - status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; // reset and refactor so factors are ON THE GPU - status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, + status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, n, /** A is original matrix */ - nnz, - descr_A_, - A_->getValues( memory::DEVICE), + nnz, + descr_A_, + A_->getValues(memory::DEVICE), A_->getRowData(memory::DEVICE), A_->getColData(memory::DEVICE), info_M_); - error_sum += status_cusolver_; + error_sum += status_cusolver_; - status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -104,52 +105,61 @@ namespace ReSolve void LinSolverDirectCuSolverGLU::combineFactors(matrix::Sparse* L, matrix::Sparse* U) { // L and U need to be in CSC format - index_type n = L->getNumRows(); - index_type* Lp = L->getColData(memory::HOST); - index_type* Li = L->getRowData(memory::HOST); - index_type* Up = U->getColData(memory::HOST); - index_type* Ui = U->getRowData(memory::HOST); - index_type nnzM = ( L->getNnz() + U->getNnz() - n ); - M_ = new matrix::Csr(n, n, nnzM); + index_type n = L->getNumRows(); + index_type* Lp = L->getColData(memory::HOST); + index_type* Li = L->getRowData(memory::HOST); + index_type* Up = U->getColData(memory::HOST); + index_type* Ui = U->getRowData(memory::HOST); + index_type nnzM = (L->getNnz() + U->getNnz() - n); + M_ = new matrix::Csr(n, n, nnzM); M_->allocateMatrixData(memory::HOST); index_type* mia = M_->getRowData(memory::HOST); index_type* mja = M_->getColData(memory::HOST); - index_type row; - for(index_type i = 0; i < n; ++i) { + index_type row; + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(index_type j = Lp[i]; j < Lp[i + 1]; ++j) { + for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; // BUT dont count diagonal twice, important - if(row != i) { + if (row != i) + { mia[row + 1]++; } } // then each column of U - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { row = Ui[j]; mia[row + 1]++; } } // then organize mia_; mia[0] = 0; - for(index_type i = 1; i < n + 1; i++) { + for (index_type i = 1; i < n + 1; i++) + { mia[i] += mia[i - 1]; } std::vector Mshifts(n, 0); - for(index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { + for (int j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; - if(row != i) { + if (row != i) + { // place (row, i) where it belongs! mja[mia[row] + Mshifts[row]] = i; Mshifts[row]++; } } // each column of U next - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { + row = Ui[j]; mja[mia[row] + Mshifts[row]] = i; Mshifts[row]++; } @@ -159,20 +169,20 @@ namespace ReSolve int LinSolverDirectCuSolverGLU::refactorize() { RESOLVE_RANGE_PUSH(__FUNCTION__); - int error_sum = 0; - status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, - A_->getNumRows(), - /** A is original matrix */ - A_->getNnz(), - descr_A_, - A_->getValues( memory::DEVICE), - A_->getRowData(memory::DEVICE), - A_->getColData(memory::DEVICE), - info_M_); - error_sum += status_cusolver_; + int error_sum = 0; + status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, + A_->getNumRows(), + /** A is original matrix */ + A_->getNnz(), + descr_A_, + A_->getValues(memory::DEVICE), + A_->getRowData(memory::DEVICE), + A_->getColData(memory::DEVICE), + info_M_); + error_sum += status_cusolver_; - status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -181,26 +191,26 @@ namespace ReSolve int LinSolverDirectCuSolverGLU::solve(vector_type* rhs, vector_type* x) { RESOLVE_RANGE_PUSH(__FUNCTION__); - status_cusolver_ = cusolverSpDgluSolve(handle_cusolversp_, - A_->getNumRows(), - /** A is original matrix */ - A_->getNnz(), - descr_A_, - A_->getValues( memory::DEVICE), - A_->getRowData(memory::DEVICE), - A_->getColData(memory::DEVICE), - rhs->getData(memory::DEVICE),/** right hand side */ - x->getData(memory::DEVICE), /** left hand side */ - &ite_refine_succ_, - &r_nrm_inf_, - info_M_, - glu_buffer_); + status_cusolver_ = cusolverSpDgluSolve(handle_cusolversp_, + A_->getNumRows(), + /** A is original matrix */ + A_->getNnz(), + descr_A_, + A_->getValues(memory::DEVICE), + A_->getRowData(memory::DEVICE), + A_->getColData(memory::DEVICE), + rhs->getData(memory::DEVICE), /** right hand side */ + x->getData(memory::DEVICE), /** left hand side */ + &ite_refine_succ_, + &r_nrm_inf_, + info_M_, + glu_buffer_); x->setDataUpdated(memory::DEVICE); RESOLVE_RANGE_POP(__FUNCTION__); - return status_cusolver_; + return status_cusolver_; } - int LinSolverDirectCuSolverGLU::solve(vector_type* ) + int LinSolverDirectCuSolverGLU::solve(vector_type*) { out::error() << "Function solve(Vector* x) not implemented in CuSolverGLU!\n" << "Consider using solve(Vector* rhs, Vector* x) instead.\n"; @@ -209,11 +219,11 @@ namespace ReSolve /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -221,19 +231,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -241,19 +251,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -261,19 +271,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -281,19 +291,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -301,8 +311,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -318,4 +328,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverGLU.hpp b/resolve/LinSolverDirectCuSolverGLU.hpp index 3ac22194e..485261d5a 100644 --- a/resolve/LinSolverDirectCuSolverGLU.hpp +++ b/resolve/LinSolverDirectCuSolverGLU.hpp @@ -1,12 +1,11 @@ #pragma once -#include "cusolver_defs.hpp" - #include "Common.hpp" -#include +#include "cusolver_defs.hpp" #include +#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -23,47 +22,47 @@ namespace ReSolve // Forward declaration of ReSolve handlers workspace class LinAlgWorkspaceCUDA; - class LinSolverDirectCuSolverGLU : public LinSolverDirect + class LinSolverDirectCuSolverGLU : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace); - ~LinSolverDirectCuSolverGLU(); - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* x) override; + public: + LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace); + ~LinSolverDirectCuSolverGLU(); + + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* x) override; + + int setup(matrix::Sparse* A, + matrix::Sparse* L, + matrix::Sparse* U, + index_type* P, + index_type* Q, + vector_type* rhs = nullptr) override; - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs = nullptr) override; - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; - private: - void combineFactors(matrix::Sparse* L, matrix::Sparse* U); ///< creates L+U from separate L, U factors - matrix::Sparse* M_; ///< the matrix that contains added factors - //note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation - cusparseMatDescr_t descr_M_; //this is NOT sparse matrix descriptor - cusparseMatDescr_t descr_A_; //this is NOT sparse matrix descriptor - LinAlgWorkspaceCUDA* workspace_; ///< Workspace access so we can copy cusparse handle - cusolverSpHandle_t handle_cusolversp_; - cusolverStatus_t status_cusolver_; - cusparseStatus_t status_cusparse_; - csrgluInfo_t info_M_; - void* glu_buffer_; - double r_nrm_inf_; ///< Error norm of the solution - int ite_refine_succ_; ///< Stores return value of cusolverSpDgluSolve + private: + void combineFactors(matrix::Sparse* L, matrix::Sparse* U); ///< creates L+U from separate L, U factors + matrix::Sparse* M_; ///< the matrix that contains added factors + // note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation + cusparseMatDescr_t descr_M_; // this is NOT sparse matrix descriptor + cusparseMatDescr_t descr_A_; // this is NOT sparse matrix descriptor + LinAlgWorkspaceCUDA* workspace_; ///< Workspace access so we can copy cusparse handle + cusolverSpHandle_t handle_cusolversp_; + cusolverStatus_t status_cusolver_; + cusparseStatus_t status_cusparse_; + csrgluInfo_t info_M_; + void* glu_buffer_; + double r_nrm_inf_; ///< Error norm of the solution + int ite_refine_succ_; ///< Stores return value of cusolverSpDgluSolve - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverRf.cpp b/resolve/LinSolverDirectCuSolverRf.cpp index 447be4c52..7d8a82466 100644 --- a/resolve/LinSolverDirectCuSolverRf.cpp +++ b/resolve/LinSolverDirectCuSolverRf.cpp @@ -1,13 +1,15 @@ +#include "LinSolverDirectCuSolverRf.hpp" + #include -#include -#include #include -#include "LinSolverDirectCuSolverRf.hpp" +#include +#include namespace ReSolve { using out = io::Logger; + /** * @brief Placeholder constructor for LinSolverDirectCuSolverRf * @@ -60,23 +62,22 @@ namespace ReSolve int LinSolverDirectCuSolverRf::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, - index_type* P, - index_type* Q, + index_type* P, + index_type* Q, vector_type* /* rhs */) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix A has to be in CSR format for cusolverRf input.\n"); - assert(L->getSparseFormat() == U->getSparseFormat() && - "Matrices L and U have to be in the same format for cusolverRf input.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix A has to be in CSR format for cusolverRf input.\n"); + assert(L->getSparseFormat() == U->getSparseFormat() && "Matrices L and U have to be in the same format for cusolverRf input.\n"); int error_sum = 0; - this->A_ = A; - index_type n = A_->getNumRows(); + this->A_ = A; + index_type n = A_->getNumRows(); - //remember - P and Q are generally CPU variables - // factorization data is stored in the handle. - // If function is called again, destroy the old handle to get rid of old data. - if (setup_completed_) { + // remember - P and Q are generally CPU variables + // factorization data is stored in the handle. + // If function is called again, destroy the old handle to get rid of old data. + if (setup_completed_) + { cusolverRfDestroy(handle_cusolverrf_); cusolverRfCreate(&handle_cusolverrf_); } @@ -86,37 +87,41 @@ namespace ReSolve matrix::Csr* L_csr = nullptr; matrix::Csr* U_csr = nullptr; - switch (L->getSparseFormat()) { - case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: - // std::cout << "converting L and U factors from CSC to CSR format ...\n"; - L_csc = static_cast(L); - U_csc = static_cast(U); - L_csr = new matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - U_csr = new matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - csc2csr(L_csc, L_csr); - csc2csr(U_csc, U_csr); - L_csr->syncData(memory::DEVICE); - U_csr->syncData(memory::DEVICE); - break; - case matrix::Sparse::COMPRESSED_SPARSE_ROW: - L_csr = dynamic_cast(L); - U_csr = dynamic_cast(U); - break; - default: - out::error() << "Matrix type for L and U factors not recognized!\n"; - out::error() << "Refactorization not completed.\n"; - return 1; + switch (L->getSparseFormat()) + { + case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: + // std::cout << "converting L and U factors from CSC to CSR format ...\n"; + L_csc = static_cast(L); + U_csc = static_cast(U); + L_csr = new matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + U_csr = new matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + csc2csr(L_csc, L_csr); + csc2csr(U_csc, U_csr); + L_csr->syncData(memory::DEVICE); + U_csr->syncData(memory::DEVICE); + break; + case matrix::Sparse::COMPRESSED_SPARSE_ROW: + L_csr = dynamic_cast(L); + U_csr = dynamic_cast(U); + break; + default: + out::error() << "Matrix type for L and U factors not recognized!\n"; + out::error() << "Refactorization not completed.\n"; + return 1; } - if (d_P_ == nullptr){ + if (d_P_ == nullptr) + { mem_.allocateArrayOnDevice(&d_P_, n); } - if (d_Q_ == nullptr){ + if (d_Q_ == nullptr) + { mem_.allocateArrayOnDevice(&d_Q_, n); } - if (d_T_ != nullptr){ + if (d_T_ != nullptr) + { mem_.deleteOnDevice(d_T_); } @@ -125,57 +130,57 @@ namespace ReSolve mem_.copyArrayHostToDevice(d_P_, P, n); mem_.copyArrayHostToDevice(d_Q_, Q, n); - - status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); - error_sum += status_cusolverrf_; - status_cusolverrf_ = cusolverRfSetupDevice(n, + status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); + error_sum += status_cusolverrf_; + status_cusolverrf_ = cusolverRfSetupDevice(n, A_->getNnz(), A_->getRowData(memory::DEVICE), A_->getColData(memory::DEVICE), - A_->getValues( memory::DEVICE), + A_->getValues(memory::DEVICE), L_csr->getNnz(), L_csr->getRowData(memory::DEVICE), L_csr->getColData(memory::DEVICE), - L_csr->getValues( memory::DEVICE), + L_csr->getValues(memory::DEVICE), U_csr->getNnz(), U_csr->getRowData(memory::DEVICE), U_csr->getColData(memory::DEVICE), - U_csr->getValues( memory::DEVICE), + U_csr->getValues(memory::DEVICE), d_P_, d_Q_, handle_cusolverrf_); - error_sum += status_cusolverrf_; + error_sum += status_cusolverrf_; mem_.deviceSynchronize(); - status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_); - error_sum += status_cusolverrf_; + status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_); + error_sum += status_cusolverrf_; const cusolverRfFactorization_t fact_alg = - CUSOLVERRF_FACTORIZATION_ALG0; // 0 - default, 1 or 2 + CUSOLVERRF_FACTORIZATION_ALG0; // 0 - default, 1 or 2 const cusolverRfTriangularSolve_t solve_alg = - CUSOLVERRF_TRIANGULAR_SOLVE_ALG1; // 1- default, 2 or 3 // 1 causes error + CUSOLVERRF_TRIANGULAR_SOLVE_ALG1; // 1- default, 2 or 3 // 1 causes error this->setAlgorithms(fact_alg, solve_alg); setup_completed_ = true; // Remove temporary objects upon setup completion - switch (L->getSparseFormat()) { - case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: - delete L_csr; - delete U_csr; - L_csr = nullptr; - U_csr = nullptr; - L_csc = nullptr; - U_csc = nullptr; - break; - case matrix::Sparse::COMPRESSED_SPARSE_ROW: - L_csr = nullptr; - U_csr = nullptr; - L_csc = nullptr; - U_csc = nullptr; - break; - default: - break; + switch (L->getSparseFormat()) + { + case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: + delete L_csr; + delete U_csr; + L_csr = nullptr; + U_csr = nullptr; + L_csc = nullptr; + U_csc = nullptr; + break; + case matrix::Sparse::COMPRESSED_SPARSE_ROW: + L_csr = nullptr; + U_csr = nullptr; + L_csc = nullptr; + U_csc = nullptr; + break; + default: + break; } // delete L_csr; // delete U_csr; @@ -192,7 +197,7 @@ namespace ReSolve * @pre The cuSolverRf handle has been created. * @post The factorization and triangular solve algorithms are set. */ - void LinSolverDirectCuSolverRf::setAlgorithms(cusolverRfFactorization_t fact_alg, + void LinSolverDirectCuSolverRf::setAlgorithms(cusolverRfFactorization_t fact_alg, cusolverRfTriangularSolve_t solve_alg) { cusolverRfSetAlgs(handle_cusolverrf_, fact_alg, solve_alg); @@ -215,7 +220,7 @@ namespace ReSolve int LinSolverDirectCuSolverRf::refactorize() { int error_sum = 0; - + // Check if matrix A data is valid assert(A_ != nullptr && "Matrix A is null!"); assert(A_->getNumRows() > 0 && "Matrix A must have positive row count!"); @@ -228,19 +233,19 @@ namespace ReSolve // Check solver handle assert(handle_cusolverrf_ != nullptr && "cuSolverRf handle is null!"); - status_cusolverrf_ = cusolverRfResetValues(A_->getNumRows(), + status_cusolverrf_ = cusolverRfResetValues(A_->getNumRows(), A_->getNnz(), A_->getRowData(memory::DEVICE), A_->getColData(memory::DEVICE), - A_->getValues( memory::DEVICE), + A_->getValues(memory::DEVICE), d_P_, d_Q_, handle_cusolverrf_); - error_sum += status_cusolverrf_; + error_sum += status_cusolverrf_; mem_.deviceSynchronize(); - status_cusolverrf_ = cusolverRfRefactor(handle_cusolverrf_); - error_sum += status_cusolverrf_; + status_cusolverrf_ = cusolverRfRefactor(handle_cusolverrf_); + error_sum += status_cusolverrf_; return error_sum; } @@ -257,14 +262,14 @@ namespace ReSolve */ int LinSolverDirectCuSolverRf::solve(vector_type* rhs) { - status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, - d_P_, - d_Q_, - 1, - d_T_, - A_->getNumRows(), - rhs->getData(memory::DEVICE), - A_->getNumRows()); + status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, + d_P_, + d_Q_, + 1, + d_T_, + A_->getNumRows(), + rhs->getData(memory::DEVICE), + A_->getNumRows()); return status_cusolverrf_; } @@ -283,16 +288,17 @@ namespace ReSolve { x->copyDataFrom(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); x->setDataUpdated(memory::DEVICE); - status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, - d_P_, - d_Q_, - 1, - d_T_, - A_->getNumRows(), - x->getData(memory::DEVICE), - A_->getNumRows()); + status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, + d_P_, + d_Q_, + 1, + d_T_, + A_->getNumRows(), + x->getData(memory::DEVICE), + A_->getNumRows()); return status_cusolverrf_; } + /** * @brief Sets a flag threshold for zero pivots and a boost factor * @@ -307,8 +313,8 @@ namespace ReSolve real_type nboost) { // Zero flagging threshold and boost NEED TO BE DOUBLE! - double zero = static_cast(nzero); - double boost = static_cast(nboost); + double zero = static_cast(nzero); + double boost = static_cast(nboost); status_cusolverrf_ = cusolverRfSetNumericProperties(handle_cusolverrf_, zero, boost); @@ -327,16 +333,16 @@ namespace ReSolve { switch (getParamId(id)) { - case ZERO_PIVOT: - zero_pivot_ = atof(value.c_str()); - setNumericalProperties(zero_pivot_, pivot_boost_); - break; - case PIVOT_BOOST: - pivot_boost_ = atof(value.c_str()); - setNumericalProperties(zero_pivot_, pivot_boost_); - break; - default: - std::cout << "Setting parameter failed!\n"; + case ZERO_PIVOT: + zero_pivot_ = atof(value.c_str()); + setNumericalProperties(zero_pivot_, pivot_boost_); + break; + case PIVOT_BOOST: + pivot_boost_ = atof(value.c_str()); + setNumericalProperties(zero_pivot_, pivot_boost_); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -355,8 +361,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -375,8 +381,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -385,12 +391,12 @@ namespace ReSolve { switch (getParamId(id)) { - case ZERO_PIVOT: - return zero_pivot_; - case PIVOT_BOOST: - return pivot_boost_; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case ZERO_PIVOT: + return zero_pivot_; + case PIVOT_BOOST: + return pivot_boost_; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -409,8 +415,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -422,15 +428,15 @@ namespace ReSolve { switch (getParamId(id)) { - case ZERO_PIVOT: - std::cout << zero_pivot_ << "\n"; - break; - case PIVOT_BOOST: - std::cout << pivot_boost_ << "\n"; - break; - default: - out::error() << "Trying to print unknown parameter " << id << "\n"; - return 1; + case ZERO_PIVOT: + std::cout << zero_pivot_ << "\n"; + break; + case PIVOT_BOOST: + std::cout << pivot_boost_ << "\n"; + break; + default: + out::error() << "Trying to print unknown parameter " << id << "\n"; + return 1; } return 0; } @@ -473,25 +479,28 @@ namespace ReSolve index_type* rowIdxCsc = A_csc->getRowData(memory::HOST); index_type* colPtrCsc = A_csc->getColData(memory::HOST); - real_type* valuesCsc = A_csc->getValues( memory::HOST); + real_type* valuesCsc = A_csc->getValues(memory::HOST); index_type* rowPtrCsr = A_csr->getRowData(memory::HOST); index_type* colIdxCsr = A_csr->getColData(memory::HOST); - real_type* valuesCsr = A_csr->getValues( memory::HOST); + real_type* valuesCsr = A_csr->getValues(memory::HOST); // Set all CSR row pointers to zero - for (index_type i = 0; i <= n; ++i) { + for (index_type i = 0; i <= n; ++i) + { rowPtrCsr[i] = 0; } // Set all CSR values and column indices to zero - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { colIdxCsr[i] = 0; valuesCsr[i] = 0.0; } // Compute number of entries per row - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { rowPtrCsr[rowIdxCsc[i]]++; } @@ -499,7 +508,7 @@ namespace ReSolve for (index_type row = 0, rowsum = 0; row < n; ++row) { // Store value in row pointer to temp - index_type temp = rowPtrCsr[row]; + index_type temp = rowPtrCsr[row]; // Copy cumulative sum to the row pointer rowPtrCsr[row] = rowsum; @@ -514,15 +523,15 @@ namespace ReSolve // Compute positions of column indices and values in CSR matrix and store them there // Overwrites CSR row pointers in the process // adding to them the number of elements in that row - for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++) + for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col + 1]; jj++) { - index_type row = rowIdxCsc[jj]; - index_type dest = rowPtrCsr[row]; + index_type row = rowIdxCsc[jj]; + index_type dest = rowPtrCsr[row]; - colIdxCsr[dest] = col; - valuesCsr[dest] = valuesCsc[jj]; + colIdxCsr[dest] = col; + valuesCsr[dest] = valuesCsc[jj]; - rowPtrCsr[row]++; + rowPtrCsr[row]++; } } @@ -531,9 +540,9 @@ namespace ReSolve // for i>=1: new rowPtrCsr[i] = old rowPtrCsr[i-1] and new rowPtrCsr[0]=0 for (index_type row = 0, last = 0; row <= n; row++) { - index_type temp = rowPtrCsr[row]; - rowPtrCsr[row] = last; - last = temp; + index_type temp = rowPtrCsr[row]; + rowPtrCsr[row] = last; + last = temp; } // Mark data on the host as updated @@ -542,4 +551,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverRf.hpp b/resolve/LinSolverDirectCuSolverRf.hpp index b20b222a6..bf9951ca4 100644 --- a/resolve/LinSolverDirectCuSolverRf.hpp +++ b/resolve/LinSolverDirectCuSolverRf.hpp @@ -1,10 +1,9 @@ #pragma once -#include "cusolverRf.h" - #include "Common.hpp" -#include +#include "cusolverRf.h" #include +#include namespace ReSolve { @@ -20,7 +19,7 @@ namespace ReSolve class Sparse; class Csr; class Csc; - } + } // namespace matrix // Forward declaration of ReSolve handlers workspace class LinAlgWorkspaceCUDA; @@ -29,49 +28,54 @@ namespace ReSolve { using vector_type = vector::Vector; - public: - LinSolverDirectCuSolverRf(LinAlgWorkspaceCUDA* workspace = nullptr); - ~LinSolverDirectCuSolverRf(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs = nullptr) override; - - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // rhs overwritten by solution - - void setAlgorithms(cusolverRfFactorization_t fact_alg, - cusolverRfTriangularSolve_t solve_alg); - int setNumericalProperties(real_type nzero, real_type nboost); - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - void initParamList(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); - - private: - enum ParamaterIDs {ZERO_PIVOT=0, PIVOT_BOOST}; - real_type zero_pivot_{0.0}; ///< The value below which zero pivot is flagged. - real_type pivot_boost_{0.0}; ///< The value which is substituted for zero pivot. - - cusolverRfHandle_t handle_cusolverrf_; - cusolverStatus_t status_cusolverrf_; - - index_type* d_P_{nullptr}; - index_type* d_Q_{nullptr}; - real_type* d_T_{nullptr}; - bool setup_completed_{false}; - - MemoryHandler mem_; ///< Device memory manager object + public: + LinSolverDirectCuSolverRf(LinAlgWorkspaceCUDA* workspace = nullptr); + ~LinSolverDirectCuSolverRf(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L, + matrix::Sparse* U, + index_type* P, + index_type* Q, + vector_type* rhs = nullptr) override; + + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // rhs overwritten by solution + + void setAlgorithms(cusolverRfFactorization_t fact_alg, + cusolverRfTriangularSolve_t solve_alg); + int setNumericalProperties(real_type nzero, real_type nboost); + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + void initParamList(); + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); + + private: + enum ParamaterIDs + { + ZERO_PIVOT = 0, + PIVOT_BOOST + }; + + real_type zero_pivot_{0.0}; ///< The value below which zero pivot is flagged. + real_type pivot_boost_{0.0}; ///< The value which is substituted for zero pivot. + + cusolverRfHandle_t handle_cusolverrf_; + cusolverStatus_t status_cusolverrf_; + + index_type* d_P_{nullptr}; + index_type* d_Q_{nullptr}; + real_type* d_T_{nullptr}; + bool setup_completed_{false}; + + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSparseILU0.cpp b/resolve/LinSolverDirectCuSparseILU0.cpp index c94945ade..3bce8530d 100644 --- a/resolve/LinSolverDirectCuSparseILU0.cpp +++ b/resolve/LinSolverDirectCuSparseILU0.cpp @@ -1,9 +1,11 @@ -#include -#include #include "LinSolverDirectCuSparseILU0.hpp" + #include -namespace ReSolve +#include +#include + +namespace ReSolve { using out = io::Logger; @@ -24,25 +26,25 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - //remember - P and Q are generally CPU variables + // remember - P and Q are generally CPU variables int error_sum = 0; - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); + this->A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); - mem_.allocateArrayOnDevice(&d_ILU_vals_,nnz); - //copy A values to a buffer first + mem_.allocateArrayOnDevice(&d_ILU_vals_, nnz); + // copy A values to a buffer first mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - mem_.allocateArrayOnDevice(&d_aux1_,n); - mem_.allocateArrayOnDevice(&d_aux2_,n); - cudaMemset(d_aux1_, 1, n*sizeof(double)); + mem_.allocateArrayOnDevice(&d_aux1_, n); + mem_.allocateArrayOnDevice(&d_aux2_, n); + cudaMemset(d_aux1_, 1, n * sizeof(double)); cusparseCreateDnVec(&vec_X_, n, d_aux1_, CUDA_R_64F); cusparseCreateDnVec(&vec_Y_, n, d_aux2_, CUDA_R_64F); - //set up descriptors + // set up descriptors // Create matrix descriptor for A cusparseCreateMatDescr(&descr_A_); @@ -57,9 +59,9 @@ namespace ReSolve n, n, nnz, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - d_ILU_vals_, //vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, @@ -69,42 +71,41 @@ namespace ReSolve n, n, nnz, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - d_ILU_vals_, //vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); - // Create matrix info structure cusparseCreateCsrilu02Info(&info_A_); - int buffer_size_A; + int buffer_size_A; size_t buffer_size_L; size_t buffer_size_U; - status_cusparse_ = cusparseDcsrilu02_bufferSize(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02_bufferSize(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, &buffer_size_A); - mem_.allocateBufferOnDevice(&buffer_,(size_t) buffer_size_A); - error_sum += status_cusparse_; + mem_.allocateBufferOnDevice(&buffer_, (size_t) buffer_size_A); + error_sum += status_cusparse_; // Now analysis - status_cusparse_ = cusparseDcsrilu02_analysis(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02_analysis(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer_); @@ -113,79 +114,79 @@ namespace ReSolve // and now the actual decomposition // Compute incomplete LU factorization - status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_ - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_ + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer_); error_sum += status_cusparse_; - // now take care of LU solve + // now take care of LU solve // now create actual Sparse matrix OBJECTS for L and U cusparseFillMode_t fillmodeL = CUSPARSE_FILL_MODE_LOWER; cusparseFillMode_t fillmodeU = CUSPARSE_FILL_MODE_UPPER; - cusparseDiagType_t diagtypeL = CUSPARSE_DIAG_TYPE_UNIT; - cusparseDiagType_t diagtypeU = CUSPARSE_DIAG_TYPE_NON_UNIT; + cusparseDiagType_t diagtypeL = CUSPARSE_DIAG_TYPE_UNIT; + cusparseDiagType_t diagtypeU = CUSPARSE_DIAG_TYPE_NON_UNIT; - cusparseSpMatSetAttribute(mat_L_, + cusparseSpMatSetAttribute(mat_L_, CUSPARSE_SPMAT_FILL_MODE, - &fillmodeL, - sizeof(fillmodeL)); + &fillmodeL, + sizeof(fillmodeL)); - cusparseSpMatSetAttribute(mat_U_, + cusparseSpMatSetAttribute(mat_U_, CUSPARSE_SPMAT_FILL_MODE, - &fillmodeU, + &fillmodeU, sizeof(fillmodeU)); - cusparseSpMatSetAttribute(mat_L_, + cusparseSpMatSetAttribute(mat_L_, CUSPARSE_SPMAT_DIAG_TYPE, - &diagtypeL, - sizeof(diagtypeL)); + &diagtypeL, + sizeof(diagtypeL)); - cusparseSpMatSetAttribute(mat_U_, + cusparseSpMatSetAttribute(mat_U_, CUSPARSE_SPMAT_DIAG_TYPE, - &diagtypeU, - sizeof(diagtypeU)); + &diagtypeU, + sizeof(diagtypeU)); - status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_L_, vec_X_, - vec_Y_, + vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, - descr_spsv_L_, + descr_spsv_L_, &buffer_size_L); - error_sum += status_cusparse_; + error_sum += status_cusparse_; mem_.allocateBufferOnDevice(&buffer_L_, buffer_size_L); - status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_U_, vec_X_, - vec_Y_, + vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, - descr_spsv_U_, + descr_spsv_U_, &buffer_size_U); - error_sum += status_cusparse_; + error_sum += status_cusparse_; mem_.allocateBufferOnDevice(&buffer_U_, buffer_size_U); - status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_L_, vec_X_, vec_Y_, @@ -193,12 +194,11 @@ namespace ReSolve CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_L_, buffer_L_); - error_sum += status_cusparse_; - + error_sum += status_cusparse_; - status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_U_, vec_X_, vec_Y_, @@ -215,24 +215,24 @@ namespace ReSolve int LinSolverDirectCuSparseILU0::reset(matrix::Sparse* A) { - int error_sum = 0; - this->A_ = A; - index_type n = A_->getNumRows(); + int error_sum = 0; + this->A_ = A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_ - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_ + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer_); - //rerun tri solve analysis - to be updated - error_sum += status_cusparse_; + // rerun tri solve analysis - to be updated + error_sum += status_cusparse_; return error_sum; } @@ -241,34 +241,34 @@ namespace ReSolve int LinSolverDirectCuSparseILU0::solve(vector_type* rhs) { int error_sum = 0; - + cusparseCreateDnVec(&vec_X_, A_->getNumRows(), rhs->getData(ReSolve::memory::DEVICE), CUDA_R_64F); cusparseCreateDnVec(&vec_Y_, A_->getNumRows(), d_aux1_, CUDA_R_64F); - - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_L_, vec_X_, vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_L_); - error_sum += status_cusparse_; + error_sum += status_cusparse_; - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_U_, vec_Y_, vec_X_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_U_); - error_sum += status_cusparse_; - + error_sum += status_cusparse_; + rhs->setDataUpdated(ReSolve::memory::DEVICE); - + cusparseDestroyDnVec(vec_X_); cusparseDestroyDnVec(vec_Y_); @@ -278,35 +278,35 @@ namespace ReSolve int LinSolverDirectCuSparseILU0::solve(vector_type* rhs, vector_type* x) { int error_sum = 0; - + cusparseCreateDnVec(&vec_X_, A_->getNumRows(), rhs->getData(ReSolve::memory::DEVICE), CUDA_R_64F); cusparseCreateDnVec(&vec_Y_, A_->getNumRows(), d_aux1_, CUDA_R_64F); - - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_L_, vec_X_, vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_L_); - error_sum += status_cusparse_; + error_sum += status_cusparse_; cusparseCreateDnVec(&vec_X_, A_->getNumRows(), x->getData(ReSolve::memory::DEVICE), CUDA_R_64F); - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_U_, vec_Y_, vec_X_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_U_); - error_sum += status_cusparse_; - + error_sum += status_cusparse_; + x->setDataUpdated(ReSolve::memory::DEVICE); - + cusparseDestroyDnVec(vec_X_); cusparseDestroyDnVec(vec_Y_); @@ -315,11 +315,11 @@ namespace ReSolve /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -327,19 +327,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -347,19 +347,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -367,19 +367,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -387,19 +387,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -407,8 +407,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -424,4 +424,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSparseILU0.hpp b/resolve/LinSolverDirectCuSparseILU0.hpp index b5817a102..f4ae5ae6b 100644 --- a/resolve/LinSolverDirectCuSparseILU0.hpp +++ b/resolve/LinSolverDirectCuSparseILU0.hpp @@ -1,14 +1,14 @@ #pragma once -#include #include +#include #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -22,59 +22,59 @@ namespace ReSolve class Sparse; } - class LinSolverDirectCuSparseILU0 : public LinSolverDirect + class LinSolverDirectCuSparseILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectCuSparseILU0(LinAlgWorkspaceCUDA* workspace); - ~LinSolverDirectCuSparseILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) - int reset(matrix::Sparse* A); - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - cusparseStatus_t status_cusparse_; - - MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceCUDA* workspace_{nullptr}; - - cusparseMatDescr_t descr_A_{nullptr}; - - cusparseSpMatDescr_t mat_L_{nullptr}; - cusparseSpMatDescr_t mat_U_{nullptr}; - - cusparseSpSVDescr_t descr_spsv_L_{nullptr}; - cusparseSpSVDescr_t descr_spsv_U_{nullptr}; - csrilu02Info_t info_A_{nullptr}; - - void* buffer_{nullptr}; - void* buffer_L_{nullptr}; - void* buffer_U_{nullptr}; - - real_type* d_aux1_{nullptr}; - real_type* d_aux2_{nullptr}; - - cusparseDnVecDescr_t vec_X_{nullptr}; - cusparseDnVecDescr_t vec_Y_{nullptr}; - - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. - real_type* d_ILU_vals_{nullptr}; + + public: + LinSolverDirectCuSparseILU0(LinAlgWorkspaceCUDA* workspace); + ~LinSolverDirectCuSparseILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) + int reset(matrix::Sparse* A); + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + cusparseStatus_t status_cusparse_; + + MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceCUDA* workspace_{nullptr}; + + cusparseMatDescr_t descr_A_{nullptr}; + + cusparseSpMatDescr_t mat_L_{nullptr}; + cusparseSpMatDescr_t mat_U_{nullptr}; + + cusparseSpSVDescr_t descr_spsv_L_{nullptr}; + cusparseSpSVDescr_t descr_spsv_U_{nullptr}; + csrilu02Info_t info_A_{nullptr}; + + void* buffer_{nullptr}; + void* buffer_L_{nullptr}; + void* buffer_U_{nullptr}; + + real_type* d_aux1_{nullptr}; + real_type* d_aux2_{nullptr}; + + cusparseDnVecDescr_t vec_X_{nullptr}; + cusparseDnVecDescr_t vec_Y_{nullptr}; + + // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. + real_type* d_ILU_vals_{nullptr}; }; -}// namespace +} // namespace ReSolve diff --git a/resolve/LinSolverDirectKLU.cpp b/resolve/LinSolverDirectKLU.cpp index 2e7e68510..5c3526c32 100644 --- a/resolve/LinSolverDirectKLU.cpp +++ b/resolve/LinSolverDirectKLU.cpp @@ -1,12 +1,15 @@ +#include "LinSolverDirectKLU.hpp" + #include // includes memcpy -#include + #include #include -#include "LinSolverDirectKLU.hpp" +#include namespace ReSolve { using out = io::Logger; + /** * @brief Constructor for LinSolverDirectKLU * @@ -15,27 +18,27 @@ namespace ReSolve LinSolverDirectKLU::LinSolverDirectKLU() { Symbolic_ = nullptr; - Numeric_ = nullptr; + Numeric_ = nullptr; L_ = nullptr; U_ = nullptr; // Populate KLU data structure holding solver parameters klu_defaults(&Common_); - Common_.btf = 0; - Common_.scale = -1; - Common_.ordering = ordering_; - Common_.tol = pivot_threshold_tol_; + Common_.btf = 0; + Common_.scale = -1; + Common_.ordering = ordering_; + Common_.tol = pivot_threshold_tol_; Common_.halt_if_singular = halt_if_singular_; // Register configurable parameters initParamList(); out::summary() << "KLU solver set with parameters:\n" - << "\tbtf = " << Common_.btf << "\n" - << "\tscale = " << Common_.scale << "\n" - << "\tordering = " << Common_.ordering << "\n" - << "\tpivot threshold = " << Common_.tol << "\n" + << "\tbtf = " << Common_.btf << "\n" + << "\tscale = " << Common_.scale << "\n" + << "\tordering = " << Common_.ordering << "\n" + << "\tpivot threshold = " << Common_.tol << "\n" << "\thalt if singular = " << Common_.halt_if_singular << "\n"; } @@ -51,11 +54,12 @@ namespace ReSolve */ LinSolverDirectKLU::~LinSolverDirectKLU() { - if (factors_extracted_) { + if (factors_extracted_) + { delete L_; delete U_; - delete [] P_; - delete [] Q_; + delete[] P_; + delete[] Q_; L_ = nullptr; U_ = nullptr; P_ = nullptr; @@ -78,9 +82,9 @@ namespace ReSolve int LinSolverDirectKLU::setup(matrix::Sparse* A, matrix::Sparse* /* L */, matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) + index_type* /* P */, + index_type* /* Q */, + vector_type* /* rhs */) { this->A_ = A; return 0; @@ -94,32 +98,37 @@ namespace ReSolve int LinSolverDirectKLU::analyze() { // in case we called this function AGAIN - if (Symbolic_ != nullptr) { + if (Symbolic_ != nullptr) + { klu_free_symbolic(&Symbolic_, &Common_); } - Symbolic_ = klu_analyze(A_->getNumRows(), + Symbolic_ = klu_analyze(A_->getNumRows(), A_->getRowData(memory::HOST), A_->getColData(memory::HOST), &Common_); factors_extracted_ = false; - if (L_ != nullptr) { + if (L_ != nullptr) + { delete L_; L_ = nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { delete U_; U_ = nullptr; } - if (Symbolic_ == nullptr) { + if (Symbolic_ == nullptr) + { out::error() << "Symbolic_ factorization failed with Common_.status = " << Common_.status << "\n"; return 1; } return 0; } + /** * @brief Factorize the matrix A. * @@ -127,7 +136,8 @@ namespace ReSolve */ int LinSolverDirectKLU::factorize() { - if (Numeric_ != nullptr) { + if (Numeric_ != nullptr) + { klu_free_numeric(&Numeric_, &Common_); } @@ -139,17 +149,20 @@ namespace ReSolve factors_extracted_ = false; - if (L_ != nullptr) { + if (L_ != nullptr) + { delete L_; L_ = nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { delete U_; U_ = nullptr; } - if (Numeric_ == nullptr) { + if (Numeric_ == nullptr) + { return 1; } return 0; @@ -160,7 +173,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int LinSolverDirectKLU::refactorize() + int LinSolverDirectKLU::refactorize() { int kluStatus = klu_refactor(A_->getRowData(memory::HOST), A_->getColData(memory::HOST), @@ -171,18 +184,21 @@ namespace ReSolve factors_extracted_ = false; - if (L_ != nullptr) { + if (L_ != nullptr) + { delete L_; L_ = nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { delete U_; U_ = nullptr; } - if (!kluStatus){ - //display error + if (!kluStatus) + { + // display error return 1; } return 0; @@ -199,13 +215,14 @@ namespace ReSolve int LinSolverDirectKLU::solve(vector_type* rhs, vector_type* x) { - //copy the vector + // copy the vector x->copyDataFrom(rhs->getData(memory::HOST), memory::HOST, memory::HOST); x->setDataUpdated(memory::HOST); int kluStatus = klu_solve(Symbolic_, Numeric_, A_->getNumRows(), 1, x->getData(memory::HOST), &Common_); - if (!kluStatus){ + if (!kluStatus) + { return 1; } return 0; @@ -214,7 +231,7 @@ namespace ReSolve /** * @brief Generic solver with matrix A with unspecified rhs (not implemented). */ - int LinSolverDirectKLU::solve(vector_type* ) + int LinSolverDirectKLU::solve(vector_type*) { out::error() << "Function solve(Vector* x) not implemented in LinSolverDirectKLU!\n" << "Consider using solve(Vector* rhs, Vector* x) instead.\n"; @@ -228,7 +245,8 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectKLU::getLFactor() { - if (!factors_extracted_) { + if (!factors_extracted_) + { const int nnzL = Numeric_->lnz; const int nnzU = Numeric_->unz; @@ -241,10 +259,10 @@ namespace ReSolve Symbolic_, L_->getColData(memory::HOST), L_->getRowData(memory::HOST), - L_->getValues( memory::HOST), + L_->getValues(memory::HOST), U_->getColData(memory::HOST), U_->getRowData(memory::HOST), - U_->getValues( memory::HOST), + U_->getValues(memory::HOST), nullptr, nullptr, nullptr, @@ -261,6 +279,7 @@ namespace ReSolve } return L_; } + /** * @brief Get the upper triangular factor U. * @@ -268,7 +287,8 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectKLU::getUFactor() { - if (!factors_extracted_) { + if (!factors_extracted_) + { const int nnzL = Numeric_->lnz; const int nnzU = Numeric_->unz; @@ -280,10 +300,10 @@ namespace ReSolve Symbolic_, L_->getColData(memory::HOST), L_->getRowData(memory::HOST), - L_->getValues( memory::HOST), + L_->getValues(memory::HOST), U_->getColData(memory::HOST), U_->getRowData(memory::HOST), - U_->getValues( memory::HOST), + U_->getValues(memory::HOST), nullptr, nullptr, nullptr, @@ -301,6 +321,7 @@ namespace ReSolve } return U_; } + /** * @brief Get the permutation vector P. * @@ -308,12 +329,15 @@ namespace ReSolve */ index_type* LinSolverDirectKLU::getPOrdering() { - if (Numeric_ != nullptr) { - P_ = new index_type[A_->getNumRows()]; + if (Numeric_ != nullptr) + { + P_ = new index_type[A_->getNumRows()]; size_t nrows = static_cast(A_->getNumRows()); std::memcpy(P_, Numeric_->Pnum, nrows * sizeof(index_type)); return P_; - } else { + } + else + { return nullptr; } } @@ -325,12 +349,15 @@ namespace ReSolve */ index_type* LinSolverDirectKLU::getQOrdering() { - if (Numeric_ != nullptr) { - Q_ = new index_type[A_->getNumRows()]; + if (Numeric_ != nullptr) + { + Q_ = new index_type[A_->getNumRows()]; size_t nrows = static_cast(A_->getNumRows()); std::memcpy(Q_, Symbolic_->Q, nrows * sizeof(index_type)); return Q_; - } else { + } + else + { return nullptr; } } @@ -345,7 +372,7 @@ namespace ReSolve void LinSolverDirectKLU::setPivotThreshold(real_type tol) { pivot_threshold_tol_ = tol; - Common_.tol = tol; + Common_.tol = tol; } /** @@ -357,7 +384,7 @@ namespace ReSolve */ void LinSolverDirectKLU::setOrdering(int ordering) { - ordering_ = ordering; + ordering_ = ordering; Common_.ordering = ordering; } @@ -371,7 +398,7 @@ namespace ReSolve void LinSolverDirectKLU::setHaltIfSingular(bool isHalt) { - halt_if_singular_ = isHalt; + halt_if_singular_ = isHalt; Common_.halt_if_singular = isHalt; } @@ -400,17 +427,17 @@ namespace ReSolve { switch (getParamId(id)) { - case PIVOT_TOL: - setPivotThreshold(atof(value.c_str())); - break; - case ORDERING: - setOrdering(atoi(value.c_str())); - break; - case HALT_IF_SINGULAR: - setHaltIfSingular(value == "yes"); - break; - default: - std::cout << "Setting parameter failed!\n"; + case PIVOT_TOL: + setPivotThreshold(atof(value.c_str())); + break; + case ORDERING: + setOrdering(atoi(value.c_str())); + break; + case HALT_IF_SINGULAR: + setHaltIfSingular(value == "yes"); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -429,8 +456,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -448,10 +475,10 @@ namespace ReSolve { switch (getParamId(id)) { - case ORDERING: - return ordering_; - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + case ORDERING: + return ordering_; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -470,10 +497,10 @@ namespace ReSolve { switch (getParamId(id)) { - case PIVOT_TOL: - return pivot_threshold_tol_; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case PIVOT_TOL: + return pivot_threshold_tol_; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -491,10 +518,10 @@ namespace ReSolve { switch (getParamId(id)) { - case HALT_IF_SINGULAR: - return halt_if_singular_; - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + case HALT_IF_SINGULAR: + return halt_if_singular_; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -510,18 +537,18 @@ namespace ReSolve { switch (getParamId(id)) { - case PIVOT_TOL: - std::cout << pivot_threshold_tol_ << "\n"; - break; - case ORDERING: - std::cout << ordering_ << "\n"; - break; - case HALT_IF_SINGULAR: - std::cout << halt_if_singular_ << "\n"; - break; - default: - out::error() << "Trying to print unknown parameter " << id << "\n"; - return 1; + case PIVOT_TOL: + std::cout << pivot_threshold_tol_ << "\n"; + break; + case ORDERING: + std::cout << ordering_ << "\n"; + break; + case HALT_IF_SINGULAR: + std::cout << halt_if_singular_ << "\n"; + break; + default: + out::error() << "Trying to print unknown parameter " << id << "\n"; + return 1; } return 0; } @@ -529,6 +556,7 @@ namespace ReSolve // // Private methods // + /** * @brief Initialize the parameter list for KLU solver. * diff --git a/resolve/LinSolverDirectKLU.hpp b/resolve/LinSolverDirectKLU.hpp index c371f9991..e134bddc0 100644 --- a/resolve/LinSolverDirectKLU.hpp +++ b/resolve/LinSolverDirectKLU.hpp @@ -1,8 +1,7 @@ #pragma once -#include "klu.h" - #include "Common.hpp" +#include "klu.h" #include namespace ReSolve @@ -23,81 +22,86 @@ namespace ReSolve { using vector_type = vector::Vector; - public: - LinSolverDirectKLU(); - ~LinSolverDirectKLU(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - - int analyze() override; //the same as symbolic factorization - int factorize() override; - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* x) override; - - matrix::Sparse* getLFactor() override; - matrix::Sparse* getUFactor() override; - index_type* getPOrdering() override; - index_type* getQOrdering() override; - - virtual void setPivotThreshold(real_type tol); - virtual void setOrdering(int ordering); - virtual void setHaltIfSingular(bool isHalt); - - virtual real_type getMatrixConditionNumber(); - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - enum ParamaterIDs {PIVOT_TOL=0, ORDERING, HALT_IF_SINGULAR}; - - /** - * @brief Ordering type (during the analysis) - * - * Available values are 0 = AMD, 1 = COLAMD, 2 = user provided P, Q. - * - * Default is COLAMD. - */ - int ordering_{1}; - - /** - * @brief Partial pivoing tolerance. - * - * If the diagonal entry has a magnitude greater than or equal to tol - * times the largest magnitude of entries in the pivot column, then the - * diagonal entry is chosen. - */ - real_type pivot_threshold_tol_{0.1}; - - /** - * @brief Halt if matrix is singular. - * - * If false: keep going. Return a Numeric object with a zero U(k,k). - * A divide-by-zero may occur when computing L(:,k). The Numeric object - * can be passed to klu_solve (a divide-by-zero will occur). It can - * also be safely passed to refactorization methods. - * - * If true: stop quickly. klu_factor will free the partially-constructed - * Numeric object. klu_refactor will not free it, but will leave the - * numerical values only partially defined. - */ - bool halt_if_singular_{false}; - - private: - void initParamList(); - bool factors_extracted_{false}; - klu_common Common_; //settings - klu_symbolic* Symbolic_{nullptr}; - klu_numeric* Numeric_{nullptr}; + public: + LinSolverDirectKLU(); + ~LinSolverDirectKLU(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + + int analyze() override; // the same as symbolic factorization + int factorize() override; + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* x) override; + + matrix::Sparse* getLFactor() override; + matrix::Sparse* getUFactor() override; + index_type* getPOrdering() override; + index_type* getQOrdering() override; + + virtual void setPivotThreshold(real_type tol); + virtual void setOrdering(int ordering); + virtual void setHaltIfSingular(bool isHalt); + + virtual real_type getMatrixConditionNumber(); + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + enum ParamaterIDs + { + PIVOT_TOL = 0, + ORDERING, + HALT_IF_SINGULAR + }; + + /** + * @brief Ordering type (during the analysis) + * + * Available values are 0 = AMD, 1 = COLAMD, 2 = user provided P, Q. + * + * Default is COLAMD. + */ + int ordering_{1}; + + /** + * @brief Partial pivoing tolerance. + * + * If the diagonal entry has a magnitude greater than or equal to tol + * times the largest magnitude of entries in the pivot column, then the + * diagonal entry is chosen. + */ + real_type pivot_threshold_tol_{0.1}; + + /** + * @brief Halt if matrix is singular. + * + * If false: keep going. Return a Numeric object with a zero U(k,k). + * A divide-by-zero may occur when computing L(:,k). The Numeric object + * can be passed to klu_solve (a divide-by-zero will occur). It can + * also be safely passed to refactorization methods. + * + * If true: stop quickly. klu_factor will free the partially-constructed + * Numeric object. klu_refactor will not free it, but will leave the + * numerical values only partially defined. + */ + bool halt_if_singular_{false}; + + private: + void initParamList(); + bool factors_extracted_{false}; + klu_common Common_; // settings + klu_symbolic* Symbolic_{nullptr}; + klu_numeric* Numeric_{nullptr}; }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectLUSOL.cpp b/resolve/LinSolverDirectLUSOL.cpp index cddb777ce..2920d6364 100644 --- a/resolve/LinSolverDirectLUSOL.cpp +++ b/resolve/LinSolverDirectLUSOL.cpp @@ -1,11 +1,11 @@ +#include "LinSolverDirectLUSOL.hpp" + #include #include #include #include -#include "LinSolverDirectLUSOL.hpp" #include "lusol/lusol.hpp" - #include #include #include @@ -21,20 +21,21 @@ namespace ReSolve luparm_[0] = 6; // Set LUSOL output print level - switch (out::verbosity()) { - case io::Logger::NONE: - luparm_[1] = -1; - break; - case io::Logger::ERRORS: - case io::Logger::WARNINGS: - luparm_[1] = 0; - break; - case io::Logger::SUMMARY: - luparm_[1] = 10; - break; - case io::Logger::EVERYTHING: - luparm_[1] = 50; - break; + switch (out::verbosity()) + { + case io::Logger::NONE: + luparm_[1] = -1; + break; + case io::Logger::ERRORS: + case io::Logger::WARNINGS: + luparm_[1] = 0; + break; + case io::Logger::SUMMARY: + luparm_[1] = 10; + break; + case io::Logger::EVERYTHING: + luparm_[1] = 50; + break; } // maximum number of columns searched allowed in a Markowitz-type @@ -94,11 +95,11 @@ namespace ReSolve int LinSolverDirectLUSOL::setup(matrix::Sparse* A, matrix::Sparse* /* L */, matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) + index_type* /* P */, + index_type* /* Q */, + vector_type* /* rhs */) { - A_ = A; + A_ = A; is_factorized_ = false; delete L_; delete U_; @@ -119,24 +120,26 @@ namespace ReSolve int LinSolverDirectLUSOL::analyze() { // Brute force solution: If the solver workspace is already allocated, nuke it! - if (is_solver_data_allocated_) { + if (is_solver_data_allocated_) + { freeSolverData(); is_solver_data_allocated_ = false; } nelem_ = A_->getNnz(); - m_ = A_->getNumRows(); - n_ = A_->getNumColumns(); + m_ = A_->getNumRows(); + n_ = A_->getNumColumns(); allocateSolverData(); is_solver_data_allocated_ = true; - real_type* a_in = A_->getValues(memory::HOST); + real_type* a_in = A_->getValues(memory::HOST); index_type* indc_in = A_->getRowData(memory::HOST); index_type* indr_in = A_->getColData(memory::HOST); - for (index_type i = 0; i < nelem_; i++) { - a_[i] = a_in[i]; + for (index_type i = 0; i < nelem_; i++) + { + a_[i] = a_in[i]; indc_[i] = indc_in[i] + 1; indr_[i] = indr_in[i] + 1; } @@ -146,7 +149,8 @@ namespace ReSolve int LinSolverDirectLUSOL::factorize() { - if (!is_solver_data_allocated_) { + if (!is_solver_data_allocated_) + { out::warning() << "LinSolverDirect::factorize() called on " << "LinSolverDirectLUSOL without allocating the " << "workspace first!\n"; @@ -154,7 +158,8 @@ namespace ReSolve // it isn't possible for this to error in any recoverable way // but we'll check anyway int inform = analyze(); - if (inform < 0) { + if (inform < 0) + { return inform; } } @@ -199,22 +204,25 @@ namespace ReSolve int LinSolverDirectLUSOL::solve(vector_type* rhs, vector_type* x) { - if (!is_factorized_) { + if (!is_factorized_) + { out::warning() << "LinSolverDirect::solve(vector_type*, vector_type*) " << "called on LinSolverDirectLUSOL without factorizing " << "first!\n"; int inform = factorize(); - if (inform < 0) { + if (inform < 0) + { return inform; } } - if (m_ == 0 || rhs->getSize() != m_ || x->getSize() != n_) { + if (m_ == 0 || rhs->getSize() != m_ || x->getSize() != n_) + { return -1; } - index_type mode = 5; + index_type mode = 5; index_type inform = 0; lu6sol(&mode, @@ -254,11 +262,13 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectLUSOL::getLFactor() { - if (!is_factorized_) { + if (!is_factorized_) + { return nullptr; } - if (L_ != nullptr) { + if (L_ != nullptr) + { // because of the way we've implemented setup, we can just return the // existing pointer in L_ as this means we've already extracted L // @@ -267,48 +277,52 @@ namespace ReSolve } index_type diagonal_bound = std::min({m_, n_}); - index_type current_nnz = luparm_[22]; + index_type current_nnz = luparm_[22]; L_ = static_cast(new matrix::Csc(n_, m_, current_nnz + diagonal_bound, false, true)); L_->allocateMatrixData(memory::HOST); index_type* columns = L_->getColData(memory::HOST); - index_type* rows = L_->getRowData(memory::HOST); - real_type* values = L_->getValues(memory::HOST); + index_type* rows = L_->getRowData(memory::HOST); + real_type* values = L_->getValues(memory::HOST); // build an inverse permutation array for p // NOTE: this is not one-indexed like the original is std::unique_ptr pt = std::unique_ptr(new index_type[m_]); - for (index_type i = 0; i < m_; i++) { + for (index_type i = 0; i < m_; i++) + { size_t j = static_cast(p_[i] - 1); - pt[j] = i; + pt[j] = i; } // preprocessing since columns are stored unordered within lusol's workspace - columns[0] = 0; - index_type offset = lena_ - 1; + columns[0] = 0; + index_type offset = lena_ - 1; index_type initial_m = luparm_[19]; - for (index_type i = 0; i < initial_m; i++) { - index_type column_nnz = lenc_[i]; - index_type column_nnz_end = offset - column_nnz; - size_t j = static_cast(indr_[column_nnz_end + 1] - 1); + for (index_type i = 0; i < initial_m; i++) + { + index_type column_nnz = lenc_[i]; + index_type column_nnz_end = offset - column_nnz; + size_t j = static_cast(indr_[column_nnz_end + 1] - 1); index_type corresponding_column = pt[j]; columns[corresponding_column + 1] = column_nnz; - offset = column_nnz_end; + offset = column_nnz_end; } - for (index_type column = 0; column < m_; column++) { + for (index_type column = 0; column < m_; column++) + { columns[column + 1] += columns[column]; } // handle rectangular l factors correctly - for (index_type column = 0; column < diagonal_bound; column++) { - columns[column + 1] += column + 1; - rows[columns[column + 1] - 1] = column; - values[columns[column + 1] - 1] = 1.0; + for (index_type column = 0; column < diagonal_bound; column++) + { + columns[column + 1] += column + 1; + rows[columns[column + 1] - 1] = column; + values[columns[column + 1] - 1] = 1.0; } // fill the destination arrays. iterates over the stored columns, depermuting the @@ -316,14 +330,16 @@ namespace ReSolve // insertion sort (where L is the L factor as stored in LUSOL's workspace) offset = lena_ - 1; - for (index_type i = 0; i < initial_m; i++) { - size_t j = static_cast(indr_[offset - lenc_[i] + 1] - 1); + for (index_type i = 0; i < initial_m; i++) + { + size_t j = static_cast(indr_[offset - lenc_[i] + 1] - 1); index_type corresponding_column = pt[j]; for (index_type destination_offset = columns[corresponding_column]; destination_offset < columns[corresponding_column + 1] - 1; - destination_offset++) { - size_t k = static_cast(indc_[offset] - 1); + destination_offset++) + { + size_t k = static_cast(indc_[offset] - 1); index_type row = pt[k]; // closest position to the target row @@ -334,19 +350,21 @@ namespace ReSolve index_type insertion_offset = static_cast(closest_position - rows); // LUSOL is not supposed to create duplicates. Report error if it does. - if (rows[insertion_offset] == row && closest_position != &rows[destination_offset]) { + if (rows[insertion_offset] == row && closest_position != &rows[destination_offset]) + { out::error() << "duplicate element found during LUSOL L factor extraction\n"; return nullptr; } for (index_type swap_offset = destination_offset; swap_offset > insertion_offset; - swap_offset--) { + swap_offset--) + { std::swap(rows[swap_offset], rows[swap_offset - 1]); std::swap(values[swap_offset], values[swap_offset - 1]); } - rows[insertion_offset] = row; + rows[insertion_offset] = row; values[insertion_offset] = -a_[offset]; offset--; @@ -364,52 +382,60 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectLUSOL::getUFactor() { - if (!is_factorized_) { + if (!is_factorized_) + { return nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { // likewise return U_; } - index_type current_nnz = luparm_[23]; + index_type current_nnz = luparm_[23]; index_type n_singularities = luparm_[10]; + U_ = static_cast(new matrix::Csr(n_, m_, current_nnz - n_singularities, false, true)); U_->allocateMatrixData(memory::HOST); - index_type* rows = U_->getRowData(memory::HOST); + index_type* rows = U_->getRowData(memory::HOST); index_type* columns = U_->getColData(memory::HOST); - real_type* values = U_->getValues(memory::HOST); + real_type* values = U_->getValues(memory::HOST); // build an inverse permutation array for q // NOTE: this is not one-indexed like the original is std::unique_ptr qt = std::unique_ptr(new index_type[n_]); - for (index_type i = 0; i < n_; i++) { + for (index_type i = 0; i < n_; i++) + { size_t j = static_cast(q_[i] - 1); - qt[j] = i; + qt[j] = i; } // preprocessing since rows technically aren't ordered either index_type stored_rows = luparm_[15]; - for (index_type stored_row = 0; stored_row < stored_rows; stored_row++) { + for (index_type stored_row = 0; stored_row < stored_rows; stored_row++) + { index_type corresponding_row = p_[stored_row] - 1; - rows[stored_row + 1] = lenr_[corresponding_row]; + rows[stored_row + 1] = lenr_[corresponding_row]; } - for (index_type row = 0; row < n_; row++) { + for (index_type row = 0; row < n_; row++) + { rows[row + 1] += rows[row]; } // fill the destination arrays - for (index_type row = 0; row < n_; row++) { + for (index_type row = 0; row < n_; row++) + { index_type offset = locr_[p_[row] - 1] - 1; - for (index_type destination_offset = rows[row]; destination_offset < rows[row + 1]; destination_offset++) { - size_t j = static_cast(indr_[offset] - 1); + for (index_type destination_offset = rows[row]; destination_offset < rows[row + 1]; destination_offset++) + { + size_t j = static_cast(indr_[offset] - 1); index_type column = qt[j]; // closest position to the target column @@ -420,18 +446,20 @@ namespace ReSolve index_type insertion_offset = static_cast(closest_position - columns); // LUSOL is not supposed to create duplicates. Report error if it does. - if (columns[insertion_offset] == column && closest_position != &columns[destination_offset]) { + if (columns[insertion_offset] == column && closest_position != &columns[destination_offset]) + { out::error() << "duplicate element found during LUSOL U factor extraction\n"; return nullptr; } - for (index_type swap_offset = destination_offset; swap_offset > insertion_offset; swap_offset--) { + for (index_type swap_offset = destination_offset; swap_offset > insertion_offset; swap_offset--) + { std::swap(columns[swap_offset], columns[swap_offset - 1]); std::swap(values[swap_offset], values[swap_offset - 1]); } columns[insertion_offset] = column; - values[insertion_offset] = a_[offset]; + values[insertion_offset] = a_[offset]; offset++; } @@ -442,11 +470,13 @@ namespace ReSolve index_type* LinSolverDirectLUSOL::getPOrdering() { - if (P_ == nullptr) { + if (P_ == nullptr) + { P_ = new index_type[m_]; } - for (index_type i = 0; i < m_; i++) { + for (index_type i = 0; i < m_; i++) + { P_[i] = p_[i] - 1; } @@ -455,11 +485,13 @@ namespace ReSolve index_type* LinSolverDirectLUSOL::getQOrdering() { - if (Q_ == nullptr) { + if (Q_ == nullptr) + { Q_ = new index_type[n_]; } - for (index_type i = 0; i < n_; i++) { + for (index_type i = 0; i < n_; i++) + { Q_[i] = q_[i] - 1; } @@ -468,11 +500,11 @@ namespace ReSolve /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -480,19 +512,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -500,19 +532,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -520,19 +552,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -540,19 +572,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -560,8 +592,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -588,7 +620,7 @@ namespace ReSolve // lena_ in resolve/lusol/lusol.f90 file. lena_ = std::max({20 * nelem_, 10 * m_, 10 * n_, 10000}); - a_ = new real_type[lena_]; + a_ = new real_type[lena_]; indc_ = new index_type[lena_]; indr_ = new index_type[lena_]; mem_.setZeroArrayOnHost(a_, lena_); @@ -649,22 +681,22 @@ namespace ReSolve delete[] w_; delete[] P_; delete[] Q_; - a_ = nullptr; - indc_ = nullptr; - indr_ = nullptr; - p_ = nullptr; - q_ = nullptr; - lenc_ = nullptr; - lenr_ = nullptr; - locc_ = nullptr; - locr_ = nullptr; + a_ = nullptr; + indc_ = nullptr; + indr_ = nullptr; + p_ = nullptr; + q_ = nullptr; + lenc_ = nullptr; + lenr_ = nullptr; + locc_ = nullptr; + locr_ = nullptr; iploc_ = nullptr; iqloc_ = nullptr; ipinv_ = nullptr; iqinv_ = nullptr; - w_ = nullptr; - P_ = nullptr; - Q_ = nullptr; + w_ = nullptr; + P_ = nullptr; + Q_ = nullptr; return 0; } diff --git a/resolve/LinSolverDirectLUSOL.hpp b/resolve/LinSolverDirectLUSOL.hpp index 9de689276..619ed4f9d 100644 --- a/resolve/LinSolverDirectLUSOL.hpp +++ b/resolve/LinSolverDirectLUSOL.hpp @@ -21,150 +21,150 @@ namespace ReSolve /** * @brief Wrapper for LUSOL solver. - * + * * LUSOL Fortran code is in file `lusol.f90`. - * + * */ class LinSolverDirectLUSOL : public LinSolverDirect { - using vector_type = vector::Vector; + using vector_type = vector::Vector; - public: - LinSolverDirectLUSOL(); - ~LinSolverDirectLUSOL(); + public: + LinSolverDirectLUSOL(); + ~LinSolverDirectLUSOL(); - /// @brief Setup function of the linear solver - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; + /// @brief Setup function of the linear solver + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; - /// @brief Analysis function of LUSOL - int analyze() override; + /// @brief Analysis function of LUSOL + int analyze() override; - int factorize() override; - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* x) override; + int factorize() override; + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* x) override; - /// @brief Returns the L factor of the solution in CSC format - matrix::Sparse* getLFactor() override; + /// @brief Returns the L factor of the solution in CSC format + matrix::Sparse* getLFactor() override; - /// @brief Returns the U factor of the solution in CSR format - matrix::Sparse* getUFactor() override; + /// @brief Returns the U factor of the solution in CSR format + matrix::Sparse* getUFactor() override; - index_type* getPOrdering() override; - index_type* getQOrdering() override; + index_type* getPOrdering() override; + index_type* getQOrdering() override; - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - int allocateSolverData(); - int freeSolverData(); - - bool is_solver_data_allocated_{false}; - - MemoryHandler mem_; - - /// @brief Indicates if we have factorized the matrix yet - bool is_factorized_ = false; - - /// @brief Storage used for the matrices - /// - /// Primary workspace used by LUSOL. Used to hold the nonzeros of matrices - /// passed along API boundaries and as a general scratch region - real_type* a_ = nullptr; - - /// @brief Row data of matrices passed along API boundaries, in addition to - /// functioning as additional workspace storage for LUSOL - index_type* indc_ = nullptr; - - /// @brief Column data of matrices passed along API boundaries, in addition to - /// functioning as additional workspace storage for LUSOL - index_type* indr_ = nullptr; - - /// @brief The number of nonzero elements within the input matrix, A - index_type nelem_ = 0; - - /// @brief The permutation vector P, stored in the way LUSOL expects it to be (1-indexed) - index_type* p_ = nullptr; - - /// @brief The permutation vector Q, stored in the way LUSOL expects it to be (1-indexed) - index_type* q_ = nullptr; - - /// @brief The length of the dynamically-allocated arrays held within `a_`, - /// `indc_`, and `indr_` - /// - /// This should be much greater than the number of nonzeroes in the input - /// matrix A, as stated in LUSOL's source code. - /// - /// Note that this is not an upper bound on the required space; the size of this - /// buffer may be insufficient, in which case a call to a LUSOL subroutine - /// utilizing it will return with inform set to 7, and the intended behavior of - /// the callee is that they should resize `a_`, `indc_`, and `indr_` to at least - /// the value specified in `luparm_[12]` - index_type lena_ = 0; - - /// @brief The number of rows in the input matrix, A - index_type m_ = 0; - - /// @brief The number of columns in the input matrix, A - index_type n_ = 0; - - /// @brief Index-typed parameters passed along the API boundary - index_type luparm_[30] = {0}; - - /// @brief Real-typed parameters passed along the API boundary - real_type parmlu_[30] = {0}; - - /// @brief Number of entries within nontrivial columns of L, stored in pivot order - index_type* lenc_ = nullptr; - - /// @brief Number of entries in each row of U, stored in original order - index_type* lenr_ = nullptr; - - /// @brief Appears to be internal storage for LUSOL, used by the LU update routines - index_type* locc_ = nullptr; - - /// @brief Points to the beginning of rows of U within a - index_type* locr_ = nullptr; - - // TODO: it would be nice to have more information about these "undefined" (as - // said within the source code documentation of lu1fac) parameters - // - // there is some amount of documentation in the "notes on array names" - // section, but given they're only really storage parameters and aren't - // useful post-factorization, we'll leave it at "undefined" for now - - /// @brief Undefined value - index_type* iploc_ = nullptr; - - /// @brief Undefined value - index_type* iqloc_ = nullptr; + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + int allocateSolverData(); + int freeSolverData(); + + bool is_solver_data_allocated_{false}; + + MemoryHandler mem_; + + /// @brief Indicates if we have factorized the matrix yet + bool is_factorized_ = false; + + /// @brief Storage used for the matrices + /// + /// Primary workspace used by LUSOL. Used to hold the nonzeros of matrices + /// passed along API boundaries and as a general scratch region + real_type* a_ = nullptr; + + /// @brief Row data of matrices passed along API boundaries, in addition to + /// functioning as additional workspace storage for LUSOL + index_type* indc_ = nullptr; + + /// @brief Column data of matrices passed along API boundaries, in addition to + /// functioning as additional workspace storage for LUSOL + index_type* indr_ = nullptr; + + /// @brief The number of nonzero elements within the input matrix, A + index_type nelem_ = 0; + + /// @brief The permutation vector P, stored in the way LUSOL expects it to be (1-indexed) + index_type* p_ = nullptr; + + /// @brief The permutation vector Q, stored in the way LUSOL expects it to be (1-indexed) + index_type* q_ = nullptr; + + /// @brief The length of the dynamically-allocated arrays held within `a_`, + /// `indc_`, and `indr_` + /// + /// This should be much greater than the number of nonzeroes in the input + /// matrix A, as stated in LUSOL's source code. + /// + /// Note that this is not an upper bound on the required space; the size of this + /// buffer may be insufficient, in which case a call to a LUSOL subroutine + /// utilizing it will return with inform set to 7, and the intended behavior of + /// the callee is that they should resize `a_`, `indc_`, and `indr_` to at least + /// the value specified in `luparm_[12]` + index_type lena_ = 0; + + /// @brief The number of rows in the input matrix, A + index_type m_ = 0; + + /// @brief The number of columns in the input matrix, A + index_type n_ = 0; + + /// @brief Index-typed parameters passed along the API boundary + index_type luparm_[30] = {0}; + + /// @brief Real-typed parameters passed along the API boundary + real_type parmlu_[30] = {0}; + + /// @brief Number of entries within nontrivial columns of L, stored in pivot order + index_type* lenc_ = nullptr; + + /// @brief Number of entries in each row of U, stored in original order + index_type* lenr_ = nullptr; + + /// @brief Appears to be internal storage for LUSOL, used by the LU update routines + index_type* locc_ = nullptr; + + /// @brief Points to the beginning of rows of U within a + index_type* locr_ = nullptr; + + // TODO: it would be nice to have more information about these "undefined" (as + // said within the source code documentation of lu1fac) parameters + // + // there is some amount of documentation in the "notes on array names" + // section, but given they're only really storage parameters and aren't + // useful post-factorization, we'll leave it at "undefined" for now + + /// @brief Undefined value + index_type* iploc_ = nullptr; + + /// @brief Undefined value + index_type* iqloc_ = nullptr; - /// @brief Undefined value - index_type* ipinv_ = nullptr; + /// @brief Undefined value + index_type* ipinv_ = nullptr; - /// @brief Undefined value - index_type* iqinv_ = nullptr; - - /// @brief Indicates singularity during LU factorization, otherwise contains either - /// the solution or target for solving a linear system - /// - /// Generally speaking, `w_[j] == +max(jth column of U)`, but if the - /// `j`th column is a singularity, `w_[j] == -max(jth column of U)`. Hence, - /// `w_[j] <= 0` implies that the column `j` of A is likely dependent on the - /// other columns of A. - /// - /// When solving a linear system `A*w_ = v_`, `w_` contains the solution. It is not - /// important what `w_` contains prior to this. - real_type* w_ = nullptr; + /// @brief Undefined value + index_type* iqinv_ = nullptr; + + /// @brief Indicates singularity during LU factorization, otherwise contains either + /// the solution or target for solving a linear system + /// + /// Generally speaking, `w_[j] == +max(jth column of U)`, but if the + /// `j`th column is a singularity, `w_[j] == -max(jth column of U)`. Hence, + /// `w_[j] <= 0` implies that the column `j` of A is likely dependent on the + /// other columns of A. + /// + /// When solving a linear system `A*w_ = v_`, `w_` contains the solution. It is not + /// important what `w_` contains prior to this. + real_type* w_ = nullptr; }; } // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index a57b8c1dc..cdf7e4693 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -1,12 +1,15 @@ -#include -#include #include "LinSolverDirectRocSolverRf.hpp" + #include + #include +#include +#include namespace ReSolve { using out = io::Logger; + /** * @brief Constructor for LinSolverDirectRocSolverRf * @@ -16,9 +19,9 @@ namespace ReSolve */ LinSolverDirectRocSolverRf::LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace) { - workspace_ = workspace; - infoM_ = nullptr; - solve_mode_ = 1; //solve mode - 1: use rocsparse trisolve + workspace_ = workspace; + infoM_ = nullptr; + solve_mode_ = 1; // solve mode - 1: use rocsparse trisolve initParamList(); } @@ -56,20 +59,19 @@ namespace ReSolve int LinSolverDirectRocSolverRf::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs) + index_type* P, + index_type* Q, + vector_type* rhs) { RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for rocsolverRf.\n"); - A_ = A; + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for rocsolverRf.\n"); + A_ = A; index_type n = A_->getNumRows(); - //set matrix info + // set matrix info rocsolver_create_rfinfo(&infoM_, workspace_->getRocblasHandle()); // Combine factors L and U into matrix M_ @@ -78,12 +80,14 @@ namespace ReSolve M_->setUpdated(ReSolve::memory::HOST); M_->syncData(ReSolve::memory::DEVICE); - //remember - P and Q are generally CPU variables - if (d_P_ == nullptr) { + // remember - P and Q are generally CPU variables + if (d_P_ == nullptr) + { mem_.allocateArrayOnDevice(&d_P_, n); } - if (d_Q_ == nullptr) { + if (d_Q_ == nullptr) + { mem_.allocateArrayOnDevice(&d_Q_, n); } mem_.copyArrayHostToDevice(d_P_, P, n); @@ -94,13 +98,13 @@ namespace ReSolve n, 1, A_->getNnz(), - A_->getRowData(ReSolve::memory::DEVICE), //kRowPtr_, - A_->getColData(ReSolve::memory::DEVICE), //jCol_, - A_->getValues(ReSolve::memory::DEVICE), //vals_, + A_->getRowData(ReSolve::memory::DEVICE), // kRowPtr_, + A_->getColData(ReSolve::memory::DEVICE), // jCol_, + A_->getValues(ReSolve::memory::DEVICE), // vals_, M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, + M_->getValues(ReSolve::memory::DEVICE), // vals_, d_P_, d_Q_, rhs->getData(ReSolve::memory::DEVICE), @@ -110,23 +114,25 @@ namespace ReSolve mem_.deviceSynchronize(); error_sum += status_rocblas_; // tri solve setup - if (solve_mode_ == 1) { // fast mode + if (solve_mode_ == 1) + { // fast mode - if (L_csr_ != nullptr) { + if (L_csr_ != nullptr) + { delete L_csr_; } L_csr_ = new ReSolve::matrix::Csr(L->getNumRows(), L->getNumColumns(), L->getNnz()); L_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - if (U_csr_ != nullptr) { + if (U_csr_ != nullptr) + { delete U_csr_; } U_csr_ = new ReSolve::matrix::Csr(U->getNumRows(), U->getNumColumns(), U->getNnz()); U_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - rocsparse_create_mat_descr(&(descr_L_)); rocsparse_set_mat_fill_mode(descr_L_, rocsparse_fill_mode_lower); rocsparse_set_mat_index_base(descr_L_, rocsparse_index_base_zero); @@ -147,10 +153,10 @@ namespace ReSolve M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, + M_->getValues(ReSolve::memory::DEVICE), // vals_, L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), U_csr_->getValues(ReSolve::memory::DEVICE)); @@ -167,9 +173,10 @@ namespace ReSolve L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, &L_buffer_size); - error_sum += status_rocsparse_; + error_sum += status_rocsparse_; mem_.allocateBufferOnDevice(&L_buffer_, L_buffer_size); + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, @@ -180,6 +187,7 @@ namespace ReSolve U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, &U_buffer_size); + error_sum += status_rocsparse_; mem_.allocateBufferOnDevice(&U_buffer_, U_buffer_size); @@ -195,34 +203,41 @@ namespace ReSolve rocsparse_analysis_policy_force, rocsparse_solve_policy_auto, L_buffer_); + error_sum += status_rocsparse_; - if (status_rocsparse_!=0) { + if (status_rocsparse_ != 0) + { std::cout << "status after analysis 1: " << status_rocsparse_ << "\n"; } + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, U_csr_->getNnz(), descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, rocsparse_analysis_policy_force, rocsparse_solve_policy_auto, U_buffer_); + error_sum += status_rocsparse_; - if (status_rocsparse_!=0) { + if (status_rocsparse_ != 0) + { out::error() << "status after analysis 2: " << status_rocsparse_ << "\n"; } - //allocate aux data - if (d_aux1_ == nullptr) { - mem_.allocateArrayOnDevice(&d_aux1_,n); + + // allocate aux data + if (d_aux1_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_aux1_, n); } - if (d_aux2_ == nullptr) { - mem_.allocateArrayOnDevice(&d_aux2_,n); + if (d_aux2_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_aux2_, n); } - } RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -238,41 +253,41 @@ namespace ReSolve RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), - A_->getNumRows(), - A_->getNnz(), - A_->getRowData(ReSolve::memory::DEVICE), //kRowPtr_, - A_->getColData(ReSolve::memory::DEVICE), //jCol_, - A_->getValues(ReSolve::memory::DEVICE), //vals_, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //OUTPUT, - d_P_, - d_Q_, - infoM_); + status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), + A_->getNumRows(), + A_->getNnz(), + A_->getRowData(ReSolve::memory::DEVICE), // kRowPtr_, + A_->getColData(ReSolve::memory::DEVICE), // jCol_, + A_->getValues(ReSolve::memory::DEVICE), // vals_, + M_->getNnz(), + M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), // OUTPUT, + d_P_, + d_Q_, + infoM_); mem_.deviceSynchronize(); error_sum += status_rocblas_; - if (solve_mode_ == 1) { - //split M, fill L and U with correct values + if (solve_mode_ == 1) + { + // split M, fill L and U with correct values status_rocblas_ = rocsolver_dcsrrf_splitlu(workspace_->getRocblasHandle(), A_->getNumRows(), M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, + M_->getValues(ReSolve::memory::DEVICE), // vals_, L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), U_csr_->getValues(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); error_sum += status_rocblas_; - } RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -289,22 +304,25 @@ namespace ReSolve { RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; - if (solve_mode_ == 0) { + if (solve_mode_ == 0) + { mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), - A_->getNumRows(), - 1, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), - d_P_, - d_Q_, - rhs->getData(ReSolve::memory::DEVICE), - A_->getNumRows(), - infoM_); + status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), + A_->getNumRows(), + 1, + M_->getNnz(), + M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), + d_P_, + d_Q_, + rhs->getData(ReSolve::memory::DEVICE), + A_->getNumRows(), + infoM_); mem_.deviceSynchronize(); - } else { + } + else + { // not implemented yet hip::permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); mem_.deviceSynchronize(); @@ -319,7 +337,7 @@ namespace ReSolve L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, d_aux1_, - d_aux2_, //result + d_aux2_, // result rocsparse_solve_policy_auto, L_buffer_); error_sum += status_rocsparse_; @@ -334,13 +352,13 @@ namespace ReSolve U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, - d_aux2_, //input - d_aux1_, //result + d_aux2_, // input + d_aux1_, // result rocsparse_solve_policy_auto, U_buffer_); error_sum += status_rocsparse_; - hip::permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); + hip::permuteVectorQ(A_->getNumRows(), d_Q_, d_aux1_, rhs->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } RESOLVE_RANGE_POP(__FUNCTION__); @@ -361,23 +379,27 @@ namespace ReSolve x->copyDataFrom(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); x->setDataUpdated(ReSolve::memory::DEVICE); int error_sum = 0; - if (solve_mode_ == 0) { + if (solve_mode_ == 0) + { mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), - A_->getNumRows(), - 1, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), - d_P_, - d_Q_, - x->getData(ReSolve::memory::DEVICE), - A_->getNumRows(), - infoM_); + status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), + A_->getNumRows(), + 1, + M_->getNnz(), + M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), + d_P_, + d_Q_, + x->getData(ReSolve::memory::DEVICE), + A_->getNumRows(), + infoM_); + error_sum += status_rocblas_; mem_.deviceSynchronize(); - } else { + } + else + { // not implemented yet hip::permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); @@ -389,12 +411,12 @@ namespace ReSolve L_csr_->getNnz(), &(constants::ONE), descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, d_aux1_, - d_aux2_, //result + d_aux2_, // result rocsparse_solve_policy_auto, L_buffer_); error_sum += status_rocsparse_; @@ -405,17 +427,17 @@ namespace ReSolve U_csr_->getNnz(), &(constants::ONE), descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, - d_aux2_, //input - d_aux1_,//result + d_aux2_, // input + d_aux1_, // result rocsparse_solve_policy_auto, U_buffer_); error_sum += status_rocsparse_; - hip::permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE)); + hip::permuteVectorQ(A_->getNumRows(), d_Q_, d_aux1_, x->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } RESOLVE_RANGE_POP(__FUNCTION__); @@ -458,17 +480,20 @@ namespace ReSolve { switch (getParamId(id)) { - case SOLVE_MODE: - if (value == "rocsparse_trisolve") { - // use rocsparse triangular solver - setSolveMode(1); - } else { - // use default - setSolveMode(0); - } - break; - default: - std::cout << "Setting parameter failed!\n"; + case SOLVE_MODE: + if (value == "rocsparse_trisolve") + { + // use rocsparse triangular solver + setSolveMode(1); + } + else + { + // use default + setSolveMode(0); + } + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -488,18 +513,18 @@ namespace ReSolve std::string value(""); switch (getParamId(id)) { - case SOLVE_MODE: - switch (getSolveMode()) - { - case 0: - value = "default"; - break; - case 1: - value = "rocsparse_trisolve"; - break; - } - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + case SOLVE_MODE: + switch (getSolveMode()) + { + case 0: + value = "default"; + break; + case 1: + value = "rocsparse_trisolve"; + break; + } + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return value; } @@ -518,8 +543,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -538,8 +563,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -558,15 +583,15 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } /** - * @brief Placeholder function that shouldn't be called. - */ + * @brief Placeholder function that shouldn't be called. + */ int LinSolverDirectRocSolverRf::printCliParam(const std::string id) const { switch (getParamId(id)) @@ -596,57 +621,67 @@ namespace ReSolve void LinSolverDirectRocSolverRf::combineFactors(matrix::Sparse* L, matrix::Sparse* U) { // L and U need to be in CSC format - index_type n = L->getNumRows(); + index_type n = L->getNumRows(); index_type* Lp = L->getColData(ReSolve::memory::HOST); index_type* Li = L->getRowData(ReSolve::memory::HOST); index_type* Up = U->getColData(ReSolve::memory::HOST); index_type* Ui = U->getRowData(ReSolve::memory::HOST); - if (M_ != nullptr) { + if (M_ != nullptr) + { delete M_; } - index_type nnzM = ( L->getNnz() + U->getNnz() - n ); - M_ = new matrix::Csr(n, n, nnzM); + index_type nnzM = (L->getNnz() + U->getNnz() - n); + M_ = new matrix::Csr(n, n, nnzM); M_->allocateMatrixData(ReSolve::memory::DEVICE); M_->allocateMatrixData(ReSolve::memory::HOST); index_type* mia = M_->getRowData(ReSolve::memory::HOST); index_type* mja = M_->getColData(ReSolve::memory::HOST); - index_type row; - for(index_type i = 0; i < n; ++i) { + index_type row; + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(index_type j = Lp[i]; j < Lp[i + 1]; ++j) { + for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; // BUT dont count diagonal twice, important - if(row != i) { + if (row != i) + { mia[row + 1]++; } } // then each column of U - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { row = Ui[j]; mia[row + 1]++; } } // then organize mia_; mia[0] = 0; - for(index_type i = 1; i < n + 1; i++) { + for (index_type i = 1; i < n + 1; i++) + { mia[i] += mia[i - 1]; } std::vector Mshifts(static_cast(n), 0); - for(index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { + for (int j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; - if(row != i) { + if (row != i) + { // place (row, i) where it belongs! mja[mia[row] + Mshifts[static_cast(row)]] = i; Mshifts[static_cast(row)]++; } } // each column of U next - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { + row = Ui[j]; mja[mia[row] + Mshifts[static_cast(row)]] = i; Mshifts[static_cast(row)]++; } @@ -662,4 +697,4 @@ namespace ReSolve { params_list_["solve_mode"] = SOLVE_MODE; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSolverRf.hpp b/resolve/LinSolverDirectRocSolverRf.hpp index 1dacc3d4b..d4702aeea 100644 --- a/resolve/LinSolverDirectRocSolverRf.hpp +++ b/resolve/LinSolverDirectRocSolverRf.hpp @@ -1,16 +1,16 @@ #pragma once -#include +#include #include #include -#include +#include #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -23,71 +23,75 @@ namespace ReSolve { class Sparse; class Csr; - } - - class LinSolverDirectRocSolverRf : public LinSolverDirect + } // namespace matrix + + class LinSolverDirectRocSolverRf : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace); - ~LinSolverDirectRocSolverRf(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs) override; - - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solution overwrites rhs - - int setSolveMode(int mode); // should probably be enum - int getSolveMode() const; //should be enum too - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - enum ParamaterIDs {SOLVE_MODE=0}; - int solve_mode_{0}; // 0 - default; 1 - use rocparse trisolver - - private: - // to be exported to matrix handler in a later time - void combineFactors(matrix::Sparse* L, matrix::Sparse* U); //create L+U from separate L, U factors - void initParamList(); - - rocblas_status status_rocblas_; - rocsparse_status status_rocsparse_; - index_type* d_P_{nullptr}; - index_type* d_Q_{nullptr}; - - MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceHIP* workspace_; - - rocsolver_rfinfo infoM_; - matrix::Sparse* M_{nullptr};//the matrix that contains added factors - - // not used by default - for fast solve - rocsparse_mat_descr descr_L_{nullptr}; - rocsparse_mat_descr descr_U_{nullptr}; - - rocsparse_mat_info info_L_{nullptr}; - rocsparse_mat_info info_U_{nullptr}; - - void* L_buffer_{nullptr}; - void* U_buffer_{nullptr}; - - ReSolve::matrix::Csr* L_csr_{nullptr}; - ReSolve::matrix::Csr* U_csr_{nullptr}; - - real_type* d_aux1_{nullptr}; - real_type* d_aux2_{nullptr}; + + public: + LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace); + ~LinSolverDirectRocSolverRf(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L, + matrix::Sparse* U, + index_type* P, + index_type* Q, + vector_type* rhs) override; + + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solution overwrites rhs + + int setSolveMode(int mode); // should probably be enum + int getSolveMode() const; // should be enum too + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + enum ParamaterIDs + { + SOLVE_MODE = 0 + }; + + int solve_mode_{0}; // 0 - default; 1 - use rocparse trisolver + + private: + // to be exported to matrix handler in a later time + void combineFactors(matrix::Sparse* L, matrix::Sparse* U); // create L+U from separate L, U factors + void initParamList(); + + rocblas_status status_rocblas_; + rocsparse_status status_rocsparse_; + index_type* d_P_{nullptr}; + index_type* d_Q_{nullptr}; + + MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceHIP* workspace_; + + rocsolver_rfinfo infoM_; + matrix::Sparse* M_{nullptr}; // the matrix that contains added factors + + // not used by default - for fast solve + rocsparse_mat_descr descr_L_{nullptr}; + rocsparse_mat_descr descr_U_{nullptr}; + + rocsparse_mat_info info_L_{nullptr}; + rocsparse_mat_info info_U_{nullptr}; + + void* L_buffer_{nullptr}; + void* U_buffer_{nullptr}; + + ReSolve::matrix::Csr* L_csr_{nullptr}; + ReSolve::matrix::Csr* U_csr_{nullptr}; + + real_type* d_aux1_{nullptr}; + real_type* d_aux2_{nullptr}; }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSparseILU0.cpp b/resolve/LinSolverDirectRocSparseILU0.cpp index 479686419..8be374924 100644 --- a/resolve/LinSolverDirectRocSparseILU0.cpp +++ b/resolve/LinSolverDirectRocSparseILU0.cpp @@ -1,10 +1,10 @@ -#include -#include #include "LinSolverDirectRocSparseILU0.hpp" +#include #include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; @@ -24,19 +24,19 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - //remember - P and Q are generally CPU variables + // remember - P and Q are generally CPU variables int error_sum = 0; - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); + this->A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); - mem_.allocateArrayOnDevice(&d_ILU_vals_,nnz); - //copy A values to a buffer first + mem_.allocateArrayOnDevice(&d_ILU_vals_, nnz); + // copy A values to a buffer first mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - //set up descriptors + // set up descriptors // Create matrix descriptor for A rocsparse_create_mat_descr(&descr_A_); @@ -58,150 +58,155 @@ namespace ReSolve size_t buffer_size_L; size_t buffer_size_U; - status_rocsparse_ = rocsparse_dcsrilu0_buffer_size(workspace_->getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0_buffer_size(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, &buffer_size_A); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Buffer size estimate for ILU0 failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + error_sum += status_rocsparse_; + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, &buffer_size_L); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Buffer size estimate for L solve failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, &buffer_size_U); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Buffer size estimate for U solve failed with code: " <getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0_analysis(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "ILU0 decomposition analysis failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Solve analysis for L solve failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, - rocsparse_analysis_policy_reuse, + rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Solve analysis for U solve failed with code: " <getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_ - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_ + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "ILU0 decomposition failed with code: " <A_ = A; - index_type n = A_->getNumRows(); + int error_sum = 0; + this->A_ = A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - status_rocsparse_ = rocsparse_dcsrilu0(workspace_->getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rocsparse_solve_policy_auto, buffer_); @@ -210,41 +215,42 @@ namespace ReSolve return error_sum; } + // solution is returned in RHS int LinSolverDirectRocSparseILU0::solve(vector_type* rhs) { - int error_sum = 0; - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + int error_sum = 0; + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), + A_->getNnz(), + &(constants::ONE), descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rhs->getData(ReSolve::memory::DEVICE), - d_aux1_, //result + d_aux1_, // result rocsparse_solve_policy_auto, buffer_); - error_sum += status_rocsparse_; + error_sum += status_rocsparse_; - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), + A_->getNnz(), + &(constants::ONE), descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, - d_aux1_, //input - rhs->getData(ReSolve::memory::DEVICE),//result + d_aux1_, // input + rhs->getData(ReSolve::memory::DEVICE), // result rocsparse_solve_policy_auto, buffer_); - error_sum += status_rocsparse_; + error_sum += status_rocsparse_; rhs->setDataUpdated(ReSolve::memory::DEVICE); return error_sum; @@ -254,52 +260,50 @@ namespace ReSolve { int error_sum = 0; + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + A_->getNumRows(), + A_->getNnz(), + &(constants::ONE), + descr_L_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, + rhs->getData(ReSolve::memory::DEVICE), + d_aux1_, // result + rocsparse_solve_policy_auto, + buffer_); + error_sum += status_rocsparse_; - - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), - descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, - rhs->getData(ReSolve::memory::DEVICE), - d_aux1_, //result - rocsparse_solve_policy_auto, - buffer_); - error_sum += status_rocsparse_; - - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), + A_->getNnz(), + &(constants::ONE), descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, - d_aux1_, //input - x->getData(ReSolve::memory::DEVICE),//result + d_aux1_, // input + x->getData(ReSolve::memory::DEVICE), // result rocsparse_solve_policy_auto, buffer_); - error_sum += status_rocsparse_; - + error_sum += status_rocsparse_; + x->setDataUpdated(ReSolve::memory::DEVICE); return error_sum; } - + /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param[in] id - string ID for parameter to get. * @param[in] value unused/ignored * @return int Value of the int parameter to return. @@ -308,19 +312,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -328,19 +332,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -348,19 +352,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -368,19 +372,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -388,8 +392,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -405,4 +409,4 @@ namespace ReSolve return 0; } -}// namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSparseILU0.hpp b/resolve/LinSolverDirectRocSparseILU0.hpp index 7b9f9f67e..1a18a14d4 100644 --- a/resolve/LinSolverDirectRocSparseILU0.hpp +++ b/resolve/LinSolverDirectRocSparseILU0.hpp @@ -1,16 +1,16 @@ #pragma once -#include -#include #include +#include +#include #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -24,49 +24,49 @@ namespace ReSolve class Sparse; } - class LinSolverDirectRocSparseILU0 : public LinSolverDirect + class LinSolverDirectRocSparseILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectRocSparseILU0(LinAlgWorkspaceHIP* workspace); - ~LinSolverDirectRocSparseILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) - int reset(matrix::Sparse* A); - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - rocsparse_status status_rocsparse_; - - MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceHIP* workspace_{nullptr}; - - rocsparse_mat_descr descr_A_{nullptr}; - rocsparse_mat_descr descr_L_{nullptr}; - rocsparse_mat_descr descr_U_{nullptr}; - - rocsparse_mat_info info_A_{nullptr}; - - void* buffer_{nullptr}; - - real_type* d_aux1_{nullptr}; - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. - real_type* d_ILU_vals_{nullptr}; + + public: + LinSolverDirectRocSparseILU0(LinAlgWorkspaceHIP* workspace); + ~LinSolverDirectRocSparseILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) + int reset(matrix::Sparse* A); + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + rocsparse_status status_rocsparse_; + + MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceHIP* workspace_{nullptr}; + + rocsparse_mat_descr descr_A_{nullptr}; + rocsparse_mat_descr descr_L_{nullptr}; + rocsparse_mat_descr descr_U_{nullptr}; + + rocsparse_mat_info info_A_{nullptr}; + + void* buffer_{nullptr}; + + real_type* d_aux1_{nullptr}; + // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. + real_type* d_ILU_vals_{nullptr}; }; -}// namespace +} // namespace ReSolve diff --git a/resolve/LinSolverDirectSerialILU0.cpp b/resolve/LinSolverDirectSerialILU0.cpp index 930bbae2f..2f22dcb69 100644 --- a/resolve/LinSolverDirectSerialILU0.cpp +++ b/resolve/LinSolverDirectSerialILU0.cpp @@ -1,11 +1,12 @@ -#include -#include -#include #include "LinSolverDirectSerialILU0.hpp" +#include + +#include #include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; @@ -16,14 +17,15 @@ namespace ReSolve LinSolverDirectSerialILU0::~LinSolverDirectSerialILU0() { - if (owns_factors_) { + if (owns_factors_) + { delete L_; delete U_; L_ = nullptr; U_ = nullptr; } - delete [] h_aux1_; - delete [] h_ILU_vals_; + delete[] h_aux1_; + delete[] h_ILU_vals_; } int LinSolverDirectSerialILU0::setup(matrix::Sparse* A, @@ -31,19 +33,20 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); + this->A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); h_ILU_vals_ = new real_type[nnz]; - h_aux1_ = new real_type[n]; + h_aux1_ = new real_type[n]; - index_type zero_pivot = 0; // no zero pivot + index_type zero_pivot = 0; // no zero pivot - //copy A values to a buffer first - for (index_type i = 0; i < nnz; ++i) { + // copy A values to a buffer first + for (index_type i = 0; i < nnz; ++i) + { h_ILU_vals_[i] = A_->getValues(ReSolve::memory::HOST)[i]; } @@ -52,89 +55,107 @@ namespace ReSolve index_type* ja_mapper = new index_type[n]; // aux scalars for indexing etc - index_type k, j, jw, j1, j2; + index_type k, j, jw, j1, j2; - for (index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { j1 = A_->getRowData(ReSolve::memory::HOST)[i]; j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) { + for (index_type j = j1; j < j2; ++j) + { ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = j; } - // IJK ILU + // IJK ILU j = j1; - while ( j < j2) { + while (j < j2) + { k = A_->getColData(ReSolve::memory::HOST)[j]; - if (k < i) { + if (k < i) + { h_ILU_vals_[j] /= h_ILU_vals_[u_ptr[k]]; - for (index_type jj = u_ptr[k] + 1; jj < A_->getRowData(ReSolve::memory::HOST)[k + 1]; ++jj) { + for (index_type jj = u_ptr[k] + 1; jj < A_->getRowData(ReSolve::memory::HOST)[k + 1]; ++jj) + { jw = ja_mapper[A_->getColData(ReSolve::memory::HOST)[jj]]; - if (jw != 0) { - h_ILU_vals_[jw] -= h_ILU_vals_[j] * h_ILU_vals_[jj]; + if (jw != 0) + { + h_ILU_vals_[jw] -= h_ILU_vals_[j] * h_ILU_vals_[jj]; } - } - } else { + } + } + else + { break; } - j++; + j++; } u_ptr[i] = j; - if ((k != i) || (fabs(h_ILU_vals_[j]) < 1e-16)) { + if ((k != i) || (fabs(h_ILU_vals_[j]) < 1e-16)) + { zero_pivot = -1; // zero pivot is in place (i,i) on the diagonal return zero_pivot; } // reset mapper - for (index_type j = j1; j< j2; ++j) { + for (index_type j = j1; j < j2; ++j) + { ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = 0; } } - //clean up - delete [] ja_mapper; - delete [] u_ptr; + // clean up + delete[] ja_mapper; + delete[] u_ptr; // split into L and U! index_type nnzL = 0, nnzU = 0; // the diagonal values GO TO U, L has 1s on the diagonal - for (index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { j1 = A_->getRowData(ReSolve::memory::HOST)[i]; j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) { - if (A->getColData(ReSolve::memory::HOST)[j] == i) { + for (index_type j = j1; j < j2; ++j) + { + if (A->getColData(ReSolve::memory::HOST)[j] == i) + { // diagonal, add to both nnzL++; nnzU++; } - if (A->getColData(ReSolve::memory::HOST)[j] > i) { + if (A->getColData(ReSolve::memory::HOST)[j] > i) + { // upper part nnzU++; } - if (A->getColData(ReSolve::memory::HOST)[j] < i) { + if (A->getColData(ReSolve::memory::HOST)[j] < i) + { // lower part nnzL++; } - } + } } // TODO: What is the purpose of nnzL and nnzU if they are not used after this? // allocate L and U - L_ = new matrix::Csr(n, n, nnzL, false, true); - U_ = new matrix::Csr(n, n, nnzU, false, true); + L_ = new matrix::Csr(n, n, nnzL, false, true); + U_ = new matrix::Csr(n, n, nnzU, false, true); owns_factors_ = true; - L_->allocateMatrixData(ReSolve::memory::HOST); - U_->allocateMatrixData(ReSolve::memory::HOST); - index_type lit = 0, uit = 0, kL, kU; + L_->allocateMatrixData(ReSolve::memory::HOST); + U_->allocateMatrixData(ReSolve::memory::HOST); + index_type lit = 0, uit = 0, kL, kU; L_->getRowData(ReSolve::memory::HOST)[0] = 0; U_->getRowData(ReSolve::memory::HOST)[0] = 0; - for (index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { j1 = A_->getRowData(ReSolve::memory::HOST)[i]; j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; kL = 0; kU = 0; - for (index_type j = j1; j < j2; ++j) { + for (index_type j = j1; j < j2; ++j) + { - if (A->getColData(ReSolve::memory::HOST)[j] == i) { + if (A->getColData(ReSolve::memory::HOST)[j] == i) + { // diagonal, add to both L_->getValues(ReSolve::memory::HOST)[lit] = 1.0; @@ -149,30 +170,33 @@ namespace ReSolve kU++; } - if (A->getColData(ReSolve::memory::HOST)[j] > i) { + if (A->getColData(ReSolve::memory::HOST)[j] > i) + { // upper part - U_->getValues(ReSolve::memory::HOST) [uit] = h_ILU_vals_[j]; - U_->getColData(ReSolve::memory::HOST)[uit] = A_->getColData(ReSolve::memory::HOST)[j]; ; + U_->getValues(ReSolve::memory::HOST)[uit] = h_ILU_vals_[j]; + U_->getColData(ReSolve::memory::HOST)[uit] = A_->getColData(ReSolve::memory::HOST)[j]; + ; uit++; kU++; } - if (A->getColData(ReSolve::memory::HOST)[j] < i) { + if (A->getColData(ReSolve::memory::HOST)[j] < i) + { // lower part - L_->getValues(ReSolve::memory::HOST) [lit] = h_ILU_vals_[j]; - L_->getColData(ReSolve::memory::HOST)[lit] = A_->getColData(ReSolve::memory::HOST)[j]; + L_->getValues(ReSolve::memory::HOST)[lit] = h_ILU_vals_[j]; + L_->getColData(ReSolve::memory::HOST)[lit] = A_->getColData(ReSolve::memory::HOST)[j]; lit++; kL++; } - } + } // update row pointers (offsets) - L_->getRowData(ReSolve::memory::HOST)[i + 1] = L_->getRowData(ReSolve::memory::HOST)[i] + kL; - U_->getRowData(ReSolve::memory::HOST)[i + 1] = U_->getRowData(ReSolve::memory::HOST)[i] + kU; + L_->getRowData(ReSolve::memory::HOST)[i + 1] = L_->getRowData(ReSolve::memory::HOST)[i] + kL; + U_->getRowData(ReSolve::memory::HOST)[i + 1] = U_->getRowData(ReSolve::memory::HOST)[i] + kU; } - + return zero_pivot; } @@ -180,30 +204,35 @@ namespace ReSolve { return this->setup(A); } + // solution is returned in RHS int LinSolverDirectSerialILU0::solve(vector_type* rhs) { int error_sum = 0; // printf("solve t 1\n"); // h_aux1 = L^{-1} rhs - for (index_type i = 0; i < L_->getNumRows(); ++i) { + for (index_type i = 0; i < L_->getNumRows(); ++i) + { h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; + for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) + { + index_type col = L_->getColData(ReSolve::memory::HOST)[j]; + h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; } h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; } // rhs = U^{-1} h_aux1 - for (index_type i = A_->getNumRows() - 1; i >= 0; --i) { + for (index_type i = A_->getNumRows() - 1; i >= 0; --i) + { rhs->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; + for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) + { + index_type col = U_->getColData(ReSolve::memory::HOST)[j]; rhs->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * rhs->getData(ReSolve::memory::HOST)[col]; } - rhs->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; //divide by the diagonal entry + rhs->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry } return error_sum; @@ -211,41 +240,45 @@ namespace ReSolve int LinSolverDirectSerialILU0::solve(vector_type* rhs, vector_type* x) { - //printf("solve t 2i, L has %d rows, U has %d rows \n", L_->getNumRows(), U_->getNumRows()); + // printf("solve t 2i, L has %d rows, U has %d rows \n", L_->getNumRows(), U_->getNumRows()); int error_sum = 0; // h_aux1 = L^{-1} rhs - //for (int ii=0; ii<10; ++ii) printf("y[%d] = %16.16f \n ", ii, rhs->getData(ReSolve::memory::HOST)[ii]); - for (index_type i = 0; i < L_->getNumRows(); ++i) { + // for (int ii=0; ii<10; ++ii) printf("y[%d] = %16.16f \n ", ii, rhs->getData(ReSolve::memory::HOST)[ii]); + for (index_type i = 0; i < L_->getNumRows(); ++i) + { h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; + for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) + { + index_type col = L_->getColData(ReSolve::memory::HOST)[j]; + h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; } h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; } - //for (int ii=0; ii<10; ++ii) printf("(L)^{-1}y[%d] = %16.16f \n ", ii, h_aux1_[ii]); - // x = U^{-1} h_aux1 + // for (int ii=0; ii<10; ++ii) printf("(L)^{-1}y[%d] = %16.16f \n ", ii, h_aux1_[ii]); + // x = U^{-1} h_aux1 - for (index_type i = U_->getNumRows() - 1; i >= 0; --i) { + for (index_type i = U_->getNumRows() - 1; i >= 0; --i) + { x->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; - x->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * x->getData(ReSolve::memory::HOST)[col]; + for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) + { + index_type col = U_->getColData(ReSolve::memory::HOST)[j]; + x->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * x->getData(ReSolve::memory::HOST)[col]; } - x->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; //divide by the diagonal entry + x->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry } - //for (int ii=0; ii<10; ++ii) printf("(LU)^{-1}y[%d] = %16.16f \n ", ii, x->getData(ReSolve::memory::HOST)[ii]); - return error_sum; + // for (int ii=0; ii<10; ++ii) printf("(LU)^{-1}y[%d] = %16.16f \n ", ii, x->getData(ReSolve::memory::HOST)[ii]); + return error_sum; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -253,19 +286,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -273,19 +306,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -293,19 +326,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -313,19 +346,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -333,8 +366,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -350,4 +383,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectSerialILU0.hpp b/resolve/LinSolverDirectSerialILU0.hpp index 7dc4914ac..64b96632a 100644 --- a/resolve/LinSolverDirectSerialILU0.hpp +++ b/resolve/LinSolverDirectSerialILU0.hpp @@ -2,11 +2,11 @@ #pragma once #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -20,41 +20,40 @@ namespace ReSolve class Sparse; } - class LinSolverDirectSerialILU0 : public LinSolverDirect + class LinSolverDirectSerialILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace); - ~LinSolverDirectSerialILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - int reset(matrix::Sparse* A); - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solutuon is returned IN RHS (rhs is overwritten) - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - - // MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceCpu* workspace_{nullptr}; - bool owns_factors_{false}; ///< If the class owns L and U factors - - real_type* h_aux1_{nullptr}; - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep - // the values of ILU decomposition. - real_type* h_ILU_vals_{nullptr}; + + public: + LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace); + ~LinSolverDirectSerialILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + int reset(matrix::Sparse* A); + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solutuon is returned IN RHS (rhs is overwritten) + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + // MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceCpu* workspace_{nullptr}; + bool owns_factors_{false}; ///< If the class owns L and U factors + + real_type* h_aux1_{nullptr}; + // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep + // the values of ILU decomposition. + real_type* h_ILU_vals_{nullptr}; }; -}// namespace +} // namespace ReSolve diff --git a/resolve/LinSolverIterative.cpp b/resolve/LinSolverIterative.cpp index ed1b97ccb..c24af8f76 100644 --- a/resolve/LinSolverIterative.cpp +++ b/resolve/LinSolverIterative.cpp @@ -3,30 +3,29 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of iterative solver base class. - * + * */ -#include -#include #include #include +#include +#include - -namespace ReSolve +namespace ReSolve { using out = io::Logger; - LinSolverIterative::LinSolverIterative() { } - + LinSolverIterative::~LinSolverIterative() { } int LinSolverIterative::setup(matrix::Sparse* A) { - if (A == nullptr) { + if (A == nullptr) + { return 1; } this->A_ = A; @@ -48,7 +47,6 @@ namespace ReSolve return total_iters_; } - real_type LinSolverIterative::getTol() const { return tol_; @@ -65,16 +63,13 @@ namespace ReSolve return 1; } - void LinSolverIterative::setTol(real_type new_tol) + void LinSolverIterative::setTol(real_type new_tol) { this->tol_ = new_tol; } - void LinSolverIterative::setMaxit(index_type new_maxit) + void LinSolverIterative::setMaxit(index_type new_maxit) { this->maxit_ = new_maxit; } -} - - - +} // namespace ReSolve diff --git a/resolve/LinSolverIterative.hpp b/resolve/LinSolverIterative.hpp index 4b9ee5f97..3cd1ef2ed 100644 --- a/resolve/LinSolverIterative.hpp +++ b/resolve/LinSolverIterative.hpp @@ -3,47 +3,49 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of iterative solver base class. - * + * */ #pragma once #include + #include -namespace ReSolve +namespace ReSolve { class GramSchmidt; + class LinSolverDirect; - class LinSolverIterative : public LinSolver + class LinSolverIterative : public LinSolver { - public: - LinSolverIterative(); - virtual ~LinSolverIterative(); - virtual int setup(matrix::Sparse* A); - virtual int resetMatrix(matrix::Sparse* A) = 0; - virtual int setupPreconditioner(std::string type, LinSolverDirect* LU_solver) = 0; + public: + LinSolverIterative(); + virtual ~LinSolverIterative(); + virtual int setup(matrix::Sparse* A); + virtual int resetMatrix(matrix::Sparse* A) = 0; + virtual int setupPreconditioner(std::string type, LinSolverDirect* LU_solver) = 0; - virtual int solve(vector_type* rhs, vector_type* init_guess) = 0; + virtual int solve(vector_type* rhs, vector_type* init_guess) = 0; - virtual real_type getFinalResidualNorm() const; - virtual real_type getInitResidualNorm() const; - virtual index_type getNumIter() const; + virtual real_type getFinalResidualNorm() const; + virtual real_type getInitResidualNorm() const; + virtual index_type getNumIter() const; - virtual int setOrthogonalization(GramSchmidt* gs); + virtual int setOrthogonalization(GramSchmidt* gs); - real_type getTol() const; - index_type getMaxit() const; + real_type getTol() const; + index_type getMaxit() const; - void setTol(real_type new_tol); - void setMaxit(index_type new_maxit); + void setTol(real_type new_tol); + void setMaxit(index_type new_maxit); - protected: - real_type initial_residual_norm_; - real_type final_residual_norm_; - index_type total_iters_; + protected: + real_type initial_residual_norm_; + real_type final_residual_norm_; + index_type total_iters_; - // Parameters common for all iterative solvers - real_type tol_{1e-14}; ///< Solver tolerance - index_type maxit_{100}; ///< Maximum solver iterations + // Parameters common for all iterative solvers + real_type tol_{1e-14}; ///< Solver tolerance + index_type maxit_{100}; ///< Maximum solver iterations }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index 7096384c6..08fda3977 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -4,14 +4,19 @@ * @brief Implementation of LinSolverIterativeFGMRES class * */ -#include +#include "LinSolverIterativeFGMRES.hpp" + #include #include #include +#include -#include +#include #include -#include "LinSolverIterativeFGMRES.hpp" +#include +#include +#include +#include namespace ReSolve { @@ -23,7 +28,7 @@ namespace ReSolve { matrix_handler_ = matrix_handler; vector_handler_ = vector_handler; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); } @@ -37,22 +42,23 @@ namespace ReSolve GramSchmidt* gs) { // Base class settings here (to be removed when solver parameter settings are implemented) - tol_ = tol; - maxit_= maxit; - restart_ = restart; + tol_ = tol; + maxit_ = maxit; + restart_ = restart; conv_cond_ = conv_cond; - flexible_ = true; + flexible_ = true; matrix_handler_ = matrix_handler; vector_handler_ = vector_handler; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); } LinSolverIterativeFGMRES::~LinSolverIterativeFGMRES() { - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); } } @@ -70,8 +76,10 @@ namespace ReSolve int LinSolverIterativeFGMRES::setup(matrix::Sparse* A) { // If A_ is already set, then report error and exit. - if (n_ != A->getNumRows()) { - if (is_solver_set_) { + if (n_ != A->getNumRows()) + { + if (is_solver_set_) + { out::warning() << "Matrix size changed. Reallocating solver ...\n"; freeSolverData(); is_solver_set_ = false; @@ -83,7 +91,8 @@ namespace ReSolve n_ = A->getNumRows(); // Allocate solver data. - if (!is_solver_set_) { + if (!is_solver_set_) + { allocateSolverData(); is_solver_set_ = true; } @@ -93,28 +102,28 @@ namespace ReSolve return 0; } - int LinSolverIterativeFGMRES::solve(vector_type* rhs, vector_type* x) + int LinSolverIterativeFGMRES::solve(vector_type* rhs, vector_type* x) { using namespace constants; - //io::Logger::setVerbosity(io::Logger::EVERYTHING); + // io::Logger::setVerbosity(io::Logger::EVERYTHING); int outer_flag = 1; - int notconv = 1; - int i = 0; - int it = 0; - int j = 0; - int k = 0; - int k1 = 0; - - real_type t = 0.0; - real_type rnorm = 0.0; - real_type bnorm = 0.0; - real_type tolrel; + int notconv = 1; + int i = 0; + int it = 0; + int j = 0; + int k = 0; + int k1 = 0; + + real_type t = 0.0; + real_type rnorm = 0.0; + real_type bnorm = 0.0; + real_type tolrel; vector_type vec_v(n_); vector_type vec_z(n_); - //V[0] = b-A*x_0 - //debug + // V[0] = b-A*x_0 + // debug vec_Z_->setToZero(memspace_); vec_V_->setToZero(memspace_); @@ -123,18 +132,21 @@ namespace ReSolve rnorm = 0.0; bnorm = vector_handler_->dot(rhs, rhs, memspace_); rnorm = vector_handler_->dot(vec_V_, vec_V_, memspace_); - //rnorm = ||V_1|| + // rnorm = ||V_1|| rnorm = std::sqrt(rnorm); bnorm = std::sqrt(bnorm); io::Logger::misc() << "it 0: norm of residual " << std::scientific << std::setprecision(16) << rnorm << " Norm of rhs: " << bnorm << "\n"; initial_residual_norm_ = rnorm; - while(outer_flag) { + while (outer_flag) + { // check if maybe residual is already small enough? - if (it == 0) { + if (it == 0) + { tolrel = tol_ * rnorm; - if (std::abs(tolrel) < MACHINE_EPSILON) { + if (std::abs(tolrel) < MACHINE_EPSILON) + { tolrel = MACHINE_EPSILON; } } @@ -142,22 +154,23 @@ namespace ReSolve bool exit_cond = false; switch (conv_cond_) { - case 0: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); - break; - case 1: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); - break; - case 2: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_*bnorm))); - break; + case 0: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); + break; + case 1: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); + break; + case 2: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_ * bnorm))); + break; } - if (exit_cond) { - outer_flag = 0; - final_residual_norm_ = rnorm; + if (exit_cond) + { + outer_flag = 0; + final_residual_norm_ = rnorm; initial_residual_norm_ = rnorm; - total_iters_ = 0; + total_iters_ = 0; break; } @@ -166,55 +179,62 @@ namespace ReSolve vector_handler_->scal(&t, vec_V_, memspace_); // initialize norm history h_rs_[0] = rnorm; - i = -1; - notconv = 1; + i = -1; + notconv = 1; - while((notconv) && (it < maxit_)) { + while ((notconv) && (it < maxit_)) + { i++; it++; // Z_i = (LU)^{-1}*V_i - vec_v.setData( vec_V_->getData(i, memspace_), memspace_); - if (flexible_) { - vec_z.setData( vec_Z_->getData(i, memspace_), memspace_); - } else { - vec_z.setData( vec_Z_->getData(0, memspace_), memspace_); + vec_v.setData(vec_V_->getData(i, memspace_), memspace_); + if (flexible_) + { + vec_z.setData(vec_Z_->getData(i, memspace_), memspace_); + } + else + { + vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); } this->precV(&vec_v, &vec_z); mem_.deviceSynchronize(); // V_{i+1}=A*Z_i - vec_v.setData( vec_V_->getData(i + 1, memspace_), memspace_); + vec_v.setData(vec_V_->getData(i + 1, memspace_), memspace_); matrix_handler_->matvec(A_, &vec_z, &vec_v, &ONE, &ZERO, memspace_); // orthogonalize V[i+1], form a column of h_H_ GS_->orthogonalize(n_, vec_V_, h_H_, i); - if (i != 0) { - for (index_type k = 1; k <= i; k++) { - k1 = k - 1; - t = h_H_[i * (restart_ + 1) + k1]; + if (i != 0) + { + for (index_type k = 1; k <= i; k++) + { + k1 = k - 1; + t = h_H_[i * (restart_ + 1) + k1]; h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; - h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; + h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; } } // if i!=0 - real_type Hii = h_H_[i * (restart_ + 1) + i]; + real_type Hii = h_H_[i * (restart_ + 1) + i]; real_type Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; - real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); + real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); - if(std::abs(gam - ZERO) <= MACHINE_EPSILON) { + if (std::abs(gam - ZERO) <= MACHINE_EPSILON) + { gam = MACHINE_EPSILON; } /* next Given's rotation */ - h_c_[i] = Hii / gam; - h_s_[i] = Hii1 / gam; + h_c_[i] = Hii / gam; + h_s_[i] = Hii1 / gam; h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; - h_rs_[i] = h_c_[i] * h_rs_[i]; + h_rs_[i] = h_c_[i] * h_rs_[i]; - h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; + h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; // residual norm estimate @@ -223,7 +243,8 @@ namespace ReSolve << std::scientific << std::setprecision(16) << rnorm << "\n"; // check convergence - if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { + if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) + { notconv = 0; } } // inner while @@ -233,32 +254,39 @@ namespace ReSolve << rnorm << "\n"; // solve tri system h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; - for(int ii = 2; ii <= i + 1; ii++) { - k = i - ii + 1; + for (int ii = 2; ii <= i + 1; ii++) + { + k = i - ii + 1; k1 = k + 1; - t = h_rs_[k]; - for (j = k1; j <= i; j++) { + t = h_rs_[k]; + for (j = k1; j <= i; j++) + { t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; } h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; } // get solution - if (flexible_) { - for (j = 0; j <= i; j++) { - vec_z.setData( vec_Z_->getData(j, memspace_), memspace_); + if (flexible_) + { + for (j = 0; j <= i; j++) + { + vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); } - } else { + } + else + { vec_Z_->setToZero(memspace_); - vec_z.setData( vec_Z_->getData(0, memspace_), memspace_); - for (j = 0; j <= i; j++) { - vec_v.setData( vec_V_->getData(j, memspace_), memspace_); + vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); + for (j = 0; j <= i; j++) + { + vec_v.setData(vec_V_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); } // now multiply d_Z by precon - vec_v.setData( vec_V_->getData(memspace_), memspace_); + vec_v.setData(vec_V_->getData(memspace_), memspace_); this->precV(&vec_z, &vec_v); // and add to x vector_handler_->axpy(&ONE, &vec_v, x, memspace_); @@ -266,7 +294,8 @@ namespace ReSolve /* test solution */ - if(rnorm <= tolrel || it >= maxit_) { + if (rnorm <= tolrel || it >= maxit_) + { // rnorm_aux = rnorm; outer_flag = 0; } @@ -277,9 +306,10 @@ namespace ReSolve // rnorm = ||V_1|| rnorm = std::sqrt(rnorm); - if(!outer_flag) { + if (!outer_flag) + { final_residual_norm_ = rnorm; - total_iters_ = it; + total_iters_ = it; io::Logger::misc() << "End of cycle, COMPUTED norm of residual " << std::scientific << std::setprecision(16) << rnorm << "\n"; @@ -288,19 +318,21 @@ namespace ReSolve return 0; } - int LinSolverIterativeFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) + int LinSolverIterativeFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) { - if (type != "LU") { + if (type != "LU") + { out::warning() << "Only LU-type solve can be used as a preconditioner at this time." << std::endl; return 1; - } else { + } + else + { LU_solver_ = LU_solver; return 0; } - } - int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse* new_matrix) + int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse* new_matrix) { A_ = new_matrix; matrix_handler_->setValuesChanged(true, memspace_); @@ -333,7 +365,8 @@ namespace ReSolve int LinSolverIterativeFGMRES::setRestart(index_type restart) { // If the new restart value is the same as the old, do nothing. - if (restart_ == restart) { + if (restart_ == restart) + { return 0; } @@ -341,7 +374,8 @@ namespace ReSolve restart_ = restart; // If solver is already set, reallocate solver data - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); allocateSolverData(); } @@ -351,7 +385,7 @@ namespace ReSolve // If Gram-Schmidt is already set, we need to reallocate it since the // restart value has changed. // if (GS_->isSetupComplete()) { - GS_->setup(n_, restart_); + GS_->setup(n_, restart_); // } return 0; @@ -366,11 +400,15 @@ namespace ReSolve int LinSolverIterativeFGMRES::setFlexible(bool is_flexible) { // TODO: Add vector method resize - if (vec_Z_) { + if (vec_Z_) + { delete vec_Z_; - if (is_flexible) { + if (is_flexible) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just a one vector, not multivector and we dont keep it vec_Z_ = new vector_type(n_); } @@ -393,43 +431,42 @@ namespace ReSolve return 0; } - index_type LinSolverIterativeFGMRES::getRestart() const + index_type LinSolverIterativeFGMRES::getRestart() const { return restart_; } - index_type LinSolverIterativeFGMRES::getConvCond() const + index_type LinSolverIterativeFGMRES::getConvCond() const { return conv_cond_; } - bool LinSolverIterativeFGMRES::getFlexible() const + bool LinSolverIterativeFGMRES::getFlexible() const { return flexible_; } - int LinSolverIterativeFGMRES::setCliParam(const std::string id, const std::string value) { switch (getParamId(id)) { - case TOL: - setTol(atof(value.c_str())); - break; - case MAXIT: - setMaxit(atoi(value.c_str())); - break; - case RESTART: - setRestart(atoi(value.c_str())); - break; - case CONV_COND: - setConvergenceCondition(atoi(value.c_str())); - break; - case FLEXIBLE: - setFlexible(value == "yes"); - break; - default: - std::cout << "Setting parameter failed!\n"; + case TOL: + setTol(atof(value.c_str())); + break; + case MAXIT: + setMaxit(atoi(value.c_str())); + break; + case RESTART: + setRestart(atoi(value.c_str())); + break; + case CONV_COND: + setConvergenceCondition(atoi(value.c_str())); + break; + case FLEXIBLE: + setFlexible(value == "yes"); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -438,8 +475,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -448,17 +485,17 @@ namespace ReSolve { switch (getParamId(id)) { - case MAXIT: - return getMaxit(); - break; - case RESTART: - return getRestart(); - break; - case CONV_COND: - return getConvCond(); - break; - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + case MAXIT: + return getMaxit(); + break; + case RESTART: + return getRestart(); + break; + case CONV_COND: + return getConvCond(); + break; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -467,11 +504,11 @@ namespace ReSolve { switch (getParamId(id)) { - case TOL: - return getTol(); - break; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case TOL: + return getTol(); + break; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -480,11 +517,11 @@ namespace ReSolve { switch (getParamId(id)) { - case FLEXIBLE: - return getFlexible(); - break; - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + case FLEXIBLE: + return getFlexible(); + break; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -523,36 +560,39 @@ namespace ReSolve { vec_V_ = new vector_type(n_, restart_ + 1); vec_V_->allocate(memspace_); - if (flexible_) { + if (flexible_) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just a one vector, not multivector and we dont keep it vec_Z_ = new vector_type(n_); } vec_Z_->allocate(memspace_); h_H_ = new real_type[restart_ * (restart_ + 1)]; - h_c_ = new real_type[restart_]; // needed for givens - h_s_ = new real_type[restart_]; // same - h_rs_ = new real_type[restart_ + 1]; // for residual norm history + h_c_ = new real_type[restart_]; // needed for givens + h_s_ = new real_type[restart_]; // same + h_rs_ = new real_type[restart_ + 1]; // for residual norm history return 0; } int LinSolverIterativeFGMRES::freeSolverData() { - delete [] h_H_ ; - delete [] h_c_ ; - delete [] h_s_ ; - delete [] h_rs_; + delete[] h_H_; + delete[] h_c_; + delete[] h_s_; + delete[] h_rs_; delete vec_V_; delete vec_Z_; - h_H_ = nullptr; - h_c_ = nullptr; - h_s_ = nullptr; - h_rs_ = nullptr; - vec_V_ = nullptr; - vec_Z_ = nullptr; + h_H_ = nullptr; + h_c_ = nullptr; + h_s_ = nullptr; + h_rs_ = nullptr; + vec_V_ = nullptr; + vec_Z_ = nullptr; return 0; } @@ -569,14 +609,17 @@ namespace ReSolve bool is_vector_handler_cuda = matrix_handler_->getIsCudaEnabled(); bool is_vector_handler_hip = matrix_handler_->getIsHipEnabled(); - if ((is_matrix_handler_cuda != is_vector_handler_cuda) || - (is_matrix_handler_hip != is_vector_handler_hip )) { + if ((is_matrix_handler_cuda != is_vector_handler_cuda) || (is_matrix_handler_hip != is_vector_handler_hip)) + { out::error() << "Matrix and vector handler backends are incompatible!\n"; } - if (is_matrix_handler_cuda || is_matrix_handler_hip) { + if (is_matrix_handler_cuda || is_matrix_handler_hip) + { memspace_ = memory::DEVICE; - } else { + } + else + { memspace_ = memory::HOST; } } @@ -590,4 +633,4 @@ namespace ReSolve params_list_["flexible"] = FLEXIBLE; } -} // namespace +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeFGMRES.hpp b/resolve/LinSolverIterativeFGMRES.hpp index cfdd6c442..f0d3aa66f 100644 --- a/resolve/LinSolverIterativeFGMRES.hpp +++ b/resolve/LinSolverIterativeFGMRES.hpp @@ -2,24 +2,36 @@ * @file LinSolverIterativeFGMRES.hpp * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @brief Declaration of LinSolverIterativeFGMRES class - * + * */ #pragma once #include "Common.hpp" -#include -#include -#include "GramSchmidt.hpp" #include #include +#include -namespace ReSolve +namespace ReSolve { + // Forward declarations + class SketchingHandler; + class GramSchmidt; + + namespace matrix + { + class Sparse; + } + + namespace vector + { + class Vector; + } + /** * @brief (F)GMRES solver - * + * * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) - * + * * @note MatrixHandler and VectorHandler objects are inherited from * LinSolver base class. */ @@ -27,68 +39,75 @@ namespace ReSolve { using vector_type = vector::Vector; - public: - LinSolverIterativeFGMRES(MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs); - LinSolverIterativeFGMRES(index_type restart, - real_type tol, - index_type maxit, - index_type conv_cond, - MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs); - ~LinSolverIterativeFGMRES(); - - int solve(vector_type* rhs, vector_type* x) override; - int setup(matrix::Sparse* A) override; - int resetMatrix(matrix::Sparse* new_A) override; - int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; - int setOrthogonalization(GramSchmidt* gs) override; - - int setRestart(index_type restart); - int setFlexible(bool is_flexible); - int setConvergenceCondition(index_type conv_cond); - index_type getRestart() const; - index_type getConvCond() const; - bool getFlexible() const; - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - enum ParamaterIDs {TOL=0, MAXIT, RESTART, CONV_COND, FLEXIBLE}; - - index_type restart_{10}; ///< GMRES restart - index_type conv_cond_{0}; ///< GMRES convergence condition - bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm - - private: - int allocateSolverData(); - int freeSolverData(); - void setMemorySpace(); - void initParamList(); - void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner - - memory::MemorySpace memspace_; - - vector_type* vec_V_{nullptr}; - vector_type* vec_Z_{nullptr}; - - real_type* h_H_{nullptr}; - real_type* h_c_{nullptr}; - real_type* h_s_{nullptr}; - real_type* h_rs_{nullptr}; - - GramSchmidt* GS_{nullptr}; - LinSolverDirect* LU_solver_{nullptr}; - index_type n_{0}; - bool is_solver_set_{false}; - - MemoryHandler mem_; ///< Device memory manager object + public: + LinSolverIterativeFGMRES(MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + GramSchmidt* gs); + LinSolverIterativeFGMRES(index_type restart, + real_type tol, + index_type maxit, + index_type conv_cond, + MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + GramSchmidt* gs); + ~LinSolverIterativeFGMRES(); + + int solve(vector_type* rhs, vector_type* x) override; + int setup(matrix::Sparse* A) override; + int resetMatrix(matrix::Sparse* new_A) override; + int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; + int setOrthogonalization(GramSchmidt* gs) override; + + int setRestart(index_type restart); + int setFlexible(bool is_flexible); + int setConvergenceCondition(index_type conv_cond); + index_type getRestart() const; + index_type getConvCond() const; + bool getFlexible() const; + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + enum ParamaterIDs + { + TOL = 0, + MAXIT, + RESTART, + CONV_COND, + FLEXIBLE + }; + + index_type restart_{10}; ///< GMRES restart + index_type conv_cond_{0}; ///< GMRES convergence condition + bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm + + private: + int allocateSolverData(); + int freeSolverData(); + void setMemorySpace(); + void initParamList(); + void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner + + memory::MemorySpace memspace_; + + vector_type* vec_V_{nullptr}; + vector_type* vec_Z_{nullptr}; + + real_type* h_H_{nullptr}; + real_type* h_c_{nullptr}; + real_type* h_s_{nullptr}; + real_type* h_rs_{nullptr}; + + GramSchmidt* GS_{nullptr}; + LinSolverDirect* LU_solver_{nullptr}; + index_type n_{0}; + bool is_solver_set_{false}; + + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index 4c1189cb6..94cce54f6 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -4,19 +4,20 @@ * @brief Implementation of LinSolverIterativeRandFGMRES class. * */ -#include +#include "LinSolverIterativeRandFGMRES.hpp" + #include #include -#include #include +#include +#include -#include -#include -#include #include #include +#include #include -#include "LinSolverIterativeRandFGMRES.hpp" +#include +#include namespace ReSolve { @@ -27,16 +28,16 @@ namespace ReSolve SketchingMethod rand_method, GramSchmidt* gs) { - tol_ = 1e-14; //default - maxit_= 100; //default - restart_ = 10; - conv_cond_ = 0;//default - flexible_ = true; - - matrix_handler_ = matrix_handler; - vector_handler_ = vector_handler; + tol_ = 1e-14; // default + maxit_ = 100; // default + restart_ = 10; + conv_cond_ = 0; // default + flexible_ = true; + + matrix_handler_ = matrix_handler; + vector_handler_ = vector_handler; sketching_method_ = rand_method; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); @@ -51,16 +52,16 @@ namespace ReSolve SketchingMethod rand_method, GramSchmidt* gs) { - tol_ = tol; - maxit_= maxit; - restart_ = restart; + tol_ = tol; + maxit_ = maxit; + restart_ = restart; conv_cond_ = conv_cond; - flexible_ = true; + flexible_ = true; - matrix_handler_ = matrix_handler; - vector_handler_ = vector_handler; + matrix_handler_ = matrix_handler; + vector_handler_ = vector_handler; sketching_method_ = rand_method; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); @@ -68,11 +69,13 @@ namespace ReSolve LinSolverIterativeRandFGMRES::~LinSolverIterativeRandFGMRES() { - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); } - if (is_sketching_set_) { + if (is_sketching_set_) + { freeSketchingData(); } } @@ -86,13 +89,16 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::setup(matrix::Sparse* A) { // If A_ is already set, then report error and exit. - if (n_ != A->getNumRows()) { - if (is_solver_set_) { + if (n_ != A->getNumRows()) + { + if (is_solver_set_) + { out::warning() << "Matrix size changed. Reallocating solver ...\n"; freeSolverData(); } - if (is_sketching_set_) { + if (is_sketching_set_) + { out::warning() << "Matrix size changed. Reallocating solver ...\n"; freeSketchingData(); } @@ -101,11 +107,13 @@ namespace ReSolve A_ = A; n_ = A_->getNumRows(); - if (!is_solver_set_) { + if (!is_solver_set_) + { allocateSolverData(); } - if (!is_sketching_set_) { + if (!is_sketching_set_) + { allocateSketchingData(); } @@ -124,29 +132,29 @@ namespace ReSolve * @invariant rhs vector is unchanged. * @post x is overwritten with the solution to the linear system. */ - int LinSolverIterativeRandFGMRES::solve(vector_type* rhs, vector_type* x) + int LinSolverIterativeRandFGMRES::solve(vector_type* rhs, vector_type* x) { using namespace constants; // io::Logger::setVerbosity(io::Logger::EVERYTHING); - int outer_flag = 1; - int notconv = 1; - index_type i = 0; - int it = 0; - int j; - int k; - int k1; - - real_type t; - real_type rnorm; - real_type bnorm; - real_type tolrel; + int outer_flag = 1; + int notconv = 1; + index_type i = 0; + int it = 0; + int j; + int k; + int k1; + + real_type t; + real_type rnorm; + real_type bnorm; + real_type tolrel; vector_type vec_v(n_); vector_type vec_z(n_); vector_type vec_s(k_rand_); - //V[0] = b-A*x_0 - //debug + // V[0] = b-A*x_0 + // debug vec_Z_->setToZero(memspace_); vec_V_->setToZero(memspace_); @@ -158,7 +166,8 @@ namespace ReSolve sketching_handler_->Theta(&vec_v, &vec_s); - if (sketching_method_ == fwht) { + if (sketching_method_ == fwht) + { vector_handler_->scal(&one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -172,11 +181,14 @@ namespace ReSolve << std::scientific << std::setprecision(16) << rnorm << " Norm of rhs: " << bnorm << "\n"; initial_residual_norm_ = rnorm; - while(outer_flag) { + while (outer_flag) + { // check if maybe residual is already small enough? - if (it == 0) { + if (it == 0) + { tolrel = tol_ * rnorm; - if (std::abs(tolrel) < MACHINE_EPSILON) { + if (std::abs(tolrel) < MACHINE_EPSILON) + { tolrel = MACHINE_EPSILON; } } @@ -184,22 +196,23 @@ namespace ReSolve bool exit_cond = false; switch (conv_cond_) { - case 0: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); - break; - case 1: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); - break; - case 2: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_*bnorm))); - break; + case 0: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); + break; + case 1: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); + break; + case 2: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_ * bnorm))); + break; } - if (exit_cond) { - outer_flag = 0; - final_residual_norm_ = rnorm; + if (exit_cond) + { + outer_flag = 0; + final_residual_norm_ = rnorm; initial_residual_norm_ = rnorm; - total_iters_ = 0; + total_iters_ = 0; break; } @@ -212,18 +225,22 @@ namespace ReSolve // initialize norm history h_rs_[0] = rnorm; - i = -1; - notconv = 1; + i = -1; + notconv = 1; - while((notconv) && (it < maxit_)) { + while ((notconv) && (it < maxit_)) + { i++; it++; // Z_i = (LU)^{-1}*V_i vec_v.setData(vec_V_->getData(i, memspace_), memspace_); - if (flexible_) { + if (flexible_) + { vec_z.setData(vec_Z_->getData(i, memspace_), memspace_); - } else { + } + else + { vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); } this->precV(&vec_v, &vec_z); @@ -239,7 +256,8 @@ namespace ReSolve // this is where it differs from normal solver GS vec_s.setData(vec_S_->getData(i + 1, memspace_), memspace_); sketching_handler_->Theta(&vec_v, &vec_s); - if (sketching_method_ == fwht) { + if (sketching_method_ == fwht) + { vector_handler_->scal(&one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -250,46 +268,50 @@ namespace ReSolve vec_aux_->copyDataFrom(&h_H_[i * (restart_ + 1)], memory::HOST, memspace_); // V(:, i+1) = w - V(:, 1:i)*d_H_col = V(:, i+1) - d_H_col*V(:,1:i); - vector_handler_->gemv('N', n_, i + 1, &MINUS_ONE, &ONE, vec_V_, vec_aux_, &vec_v, memspace_ ); + vector_handler_->gemv('N', n_, i + 1, &MINUS_ONE, &ONE, vec_V_, vec_aux_, &vec_v, memspace_); t = 1.0 / h_H_[i * (restart_ + 1) + i + 1]; vector_handler_->scal(&t, &vec_v, memspace_); mem_.deviceSynchronize(); vec_s.setData(vec_S_->getData(i + 1, memspace_), memspace_); - if (i != 0) { - for (int k = 1; k <= i; k++) { - k1 = k - 1; - t = h_H_[i * (restart_ + 1) + k1]; + if (i != 0) + { + for (int k = 1; k <= i; k++) + { + k1 = k - 1; + t = h_H_[i * (restart_ + 1) + k1]; h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; - h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; + h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; } } // if (i != 0) - real_type Hii = h_H_[i * (restart_ + 1) + i]; + real_type Hii = h_H_[i * (restart_ + 1) + i]; real_type Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; - real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); + real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); - if(std::abs(gam - ZERO) <= MACHINE_EPSILON) { + if (std::abs(gam - ZERO) <= MACHINE_EPSILON) + { gam = MACHINE_EPSILON; } /* next Given's rotation */ - h_c_[i] = Hii / gam; - h_s_[i] = Hii1 / gam; + h_c_[i] = Hii / gam; + h_s_[i] = Hii1 / gam; h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; - h_rs_[i] = h_c_[i] * h_rs_[i]; + h_rs_[i] = h_c_[i] * h_rs_[i]; - h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; + h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; // residual norm estimate rnorm = std::abs(h_rs_[i + 1]); - io::Logger::misc() << "it: "<< it << " --> norm of the residual " + io::Logger::misc() << "it: " << it << " --> norm of the residual " << std::scientific << std::setprecision(16) << rnorm << "\n"; // check convergence - if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { + if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) + { notconv = 0; } } // inner while @@ -299,26 +321,33 @@ namespace ReSolve << rnorm << "\n"; // solve tri system h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; - for (int ii = 2; ii <= i + 1; ii++) { - k = i - ii + 1; + for (int ii = 2; ii <= i + 1; ii++) + { + k = i - ii + 1; k1 = k + 1; - t = h_rs_[k]; - for(j = k1; j <= i; j++) { + t = h_rs_[k]; + for (j = k1; j <= i; j++) + { t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; } h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; } // get solution - if (flexible_) { - for (j = 0; j <= i; j++) { + if (flexible_) + { + for (j = 0; j <= i; j++) + { vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); } - } else { + } + else + { vec_Z_->setToZero(0, memspace_); - vec_z.setData( vec_Z_->getData(0, memspace_), memspace_); - for(j = 0; j <= i; j++) { + vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); + for (j = 0; j <= i; j++) + { vec_v.setData(vec_V_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); } @@ -331,23 +360,27 @@ namespace ReSolve } /* test solution */ - if(rnorm <= tolrel || it >= maxit_) { + if (rnorm <= tolrel || it >= maxit_) + { outer_flag = 0; } rhs->copyDataTo(vec_V_->getData(memspace_), 0, memspace_); matrix_handler_->matvec(A_, x, vec_V_, &MINUS_ONE, &ONE, memspace_); - if (outer_flag) { + if (outer_flag) + { sketching_handler_->reset(); - if (sketching_method_ == cs) { + if (sketching_method_ == cs) + { vec_S_->setToZero(memspace_); } vec_v.setData(vec_V_->getData(0, memspace_), memspace_); vec_s.setData(vec_S_->getData(0, memspace_), memspace_); sketching_handler_->Theta(&vec_v, &vec_s); - if (sketching_method_ == fwht) { + if (sketching_method_ == fwht) + { vector_handler_->scal(&one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -356,7 +389,8 @@ namespace ReSolve rnorm = std::sqrt(rnorm); } - if (!outer_flag) { + if (!outer_flag) + { rnorm = vector_handler_->dot(vec_V_, vec_V_, memspace_); // rnorm = ||V_0|| rnorm = std::sqrt(rnorm); @@ -366,30 +400,32 @@ namespace ReSolve << rnorm << "\n"; final_residual_norm_ = rnorm; - total_iters_ = it; + total_iters_ = it; } } // outer while return 0; } - int LinSolverIterativeRandFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) + int LinSolverIterativeRandFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) { - if (type != "LU") { + if (type != "LU") + { out::warning() << "Only cusolverRf tri solve can be used as a preconditioner at this time." << std::endl; return 1; - } else { + } + else + { LU_solver_ = LU_solver; return 0; } - } - index_type LinSolverIterativeRandFGMRES::getKrand() + index_type LinSolverIterativeRandFGMRES::getKrand() { return k_rand_; } - int LinSolverIterativeRandFGMRES::resetMatrix(matrix::Sparse* new_matrix) + int LinSolverIterativeRandFGMRES::resetMatrix(matrix::Sparse* new_matrix) { A_ = new_matrix; matrix_handler_->setValuesChanged(true, memspace_); @@ -404,8 +440,10 @@ namespace ReSolve */ int LinSolverIterativeRandFGMRES::setSketchingMethod(SketchingMethod method) { - if (is_sketching_set_) { - if (method == sketching_method_) { + if (is_sketching_set_) + { + if (method == sketching_method_) + { out::misc() << "Keeping sketching method " << method << "\n"; return 0; } @@ -415,7 +453,8 @@ namespace ReSolve // If solver is set, go ahead and create sketching, otherwise just set sketching method. sketching_method_ = method; - if (is_solver_set_) { + if (is_solver_set_) + { out::misc() << "Allocating sketching method " << sketching_method_ << "\n"; allocateSketchingData(); } @@ -448,7 +487,8 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::setRestart(index_type restart) { // If the new restart value is the same as the old, do nothing. - if (restart_ == restart) { + if (restart_ == restart) + { return 0; } @@ -456,13 +496,15 @@ namespace ReSolve restart_ = restart; // If solver is already set, reallocate solver data - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); allocateSolverData(); } // If sketching has been set, reallocate sketching data - if (is_sketching_set_) { + if (is_sketching_set_) + { freeSketchingData(); allocateSketchingData(); } @@ -484,11 +526,15 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::setFlexible(bool is_flexible) { // TODO: Add vector method resize - if (vec_Z_) { + if (vec_Z_) + { delete vec_Z_; - if (is_flexible) { + if (is_flexible) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just one vector, not a multivector and we don't keep it vec_Z_ = new vector_type(n_); } @@ -511,17 +557,17 @@ namespace ReSolve return 0; } - index_type LinSolverIterativeRandFGMRES::getRestart() const + index_type LinSolverIterativeRandFGMRES::getRestart() const { return restart_; } - index_type LinSolverIterativeRandFGMRES::getConvCond() const + index_type LinSolverIterativeRandFGMRES::getConvCond() const { return conv_cond_; } - bool LinSolverIterativeRandFGMRES::getFlexible() const + bool LinSolverIterativeRandFGMRES::getFlexible() const { return flexible_; } @@ -530,23 +576,23 @@ namespace ReSolve { switch (getParamId(id)) { - case TOL: - setTol(atof(value.c_str())); - break; - case MAXIT: - setMaxit(atoi(value.c_str())); - break; - case RESTART: - setRestart(atoi(value.c_str())); - break; - case CONV_COND: - setConvergenceCondition(atoi(value.c_str())); - break; - case FLEXIBLE: - setFlexible(value == "yes"); - break; - default: - std::cout << "Setting parameter failed!\n"; + case TOL: + setTol(atof(value.c_str())); + break; + case MAXIT: + setMaxit(atoi(value.c_str())); + break; + case RESTART: + setRestart(atoi(value.c_str())); + break; + case CONV_COND: + setConvergenceCondition(atoi(value.c_str())); + break; + case FLEXIBLE: + setFlexible(value == "yes"); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -555,8 +601,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -565,17 +611,17 @@ namespace ReSolve { switch (getParamId(id)) { - case MAXIT: - return getMaxit(); - break; - case RESTART: - return getRestart(); - break; - case CONV_COND: - return getConvCond(); - break; - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + case MAXIT: + return getMaxit(); + break; + case RESTART: + return getRestart(); + break; + case CONV_COND: + return getConvCond(); + break; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -584,11 +630,11 @@ namespace ReSolve { switch (getParamId(id)) { - case TOL: - return getTol(); - break; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case TOL: + return getTol(); + break; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -597,11 +643,11 @@ namespace ReSolve { switch (getParamId(id)) { - case FLEXIBLE: - return getFlexible(); - break; - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + case FLEXIBLE: + return getFlexible(); + break; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -634,9 +680,12 @@ namespace ReSolve { vec_V_ = new vector_type(n_, restart_ + 1); vec_V_->allocate(memspace_); - if (flexible_) { + if (flexible_) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just one vector, not multivector and we dont keep it vec_Z_ = new vector_type(n_); } @@ -645,9 +694,9 @@ namespace ReSolve vec_aux_->allocate(memspace_); h_H_ = new real_type[restart_ * (restart_ + 1)]; - h_c_ = new real_type[restart_]; // needed for givens - h_s_ = new real_type[restart_]; // same - h_rs_ = new real_type[restart_ + 1]; // for residual norm history + h_c_ = new real_type[restart_]; // needed for givens + h_s_ = new real_type[restart_]; // same + h_rs_ = new real_type[restart_ + 1]; // for residual norm history is_solver_set_ = true; return 0; @@ -655,18 +704,18 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::freeSolverData() { - delete [] h_H_ ; - delete [] h_c_ ; - delete [] h_s_ ; - delete [] h_rs_; + delete[] h_H_; + delete[] h_c_; + delete[] h_s_; + delete[] h_rs_; delete vec_V_; delete vec_Z_; delete vec_aux_; - h_H_ = nullptr; - h_c_ = nullptr; - h_s_ = nullptr; - h_rs_ = nullptr; + h_H_ = nullptr; + h_c_ = nullptr; + h_s_ = nullptr; + h_rs_ = nullptr; vec_V_ = nullptr; vec_Z_ = nullptr; vec_aux_ = nullptr; @@ -684,34 +733,39 @@ namespace ReSolve { // Set randomized method k_rand_ = n_; - switch (sketching_method_) { - case cs: - if (std::ceil(restart_ * std::log(n_)) < k_rand_) { - k_rand_ = static_cast(std::ceil(restart_ * std::log(static_cast(n_)))); - } - sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); - // set k and n - break; - case fwht: - if (std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_)) < k_rand_) { - k_rand_ = static_cast(std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_))); - } - sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); - break; - default: - io::Logger::warning() << "Wrong sketching method, setting to default (CountSketch)\n"; - sketching_method_ = cs; - if (std::ceil(restart_ * std::log(n_)) < k_rand_) { - k_rand_ = static_cast(std::ceil(restart_ * std::log(n_))); - } - sketching_handler_ = new SketchingHandler(cs, device_type_); - break; + switch (sketching_method_) + { + case cs: + if (std::ceil(restart_ * std::log(n_)) < k_rand_) + { + k_rand_ = static_cast(std::ceil(restart_ * std::log(static_cast(n_)))); + } + sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); + // set k and n + break; + case fwht: + if (std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_)) < k_rand_) + { + k_rand_ = static_cast(std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_))); + } + sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); + break; + default: + io::Logger::warning() << "Wrong sketching method, setting to default (CountSketch)\n"; + sketching_method_ = cs; + if (std::ceil(restart_ * std::log(n_)) < k_rand_) + { + k_rand_ = static_cast(std::ceil(restart_ * std::log(n_))); + } + sketching_handler_ = new SketchingHandler(cs, device_type_); + break; } one_over_k_ = 1.0 / std::sqrt((real_type) k_rand_); - vec_S_ = new vector_type(k_rand_, restart_ + 1); + vec_S_ = new vector_type(k_rand_, restart_ + 1); vec_S_->allocate(memspace_); - if (sketching_method_ == cs) { + if (sketching_method_ == cs) + { vec_S_->setToZero(memspace_); } @@ -725,7 +779,7 @@ namespace ReSolve delete vec_S_; delete sketching_handler_; - vec_S_ = nullptr; + vec_S_ = nullptr; sketching_handler_ = nullptr; is_sketching_set_ = false; @@ -737,7 +791,6 @@ namespace ReSolve LU_solver_->solve(rhs, x); } - /** * @brief Set memory space and device tape based on how MatrixHandler * and VectorHandler are configured. @@ -750,19 +803,24 @@ namespace ReSolve bool is_vector_handler_cuda = matrix_handler_->getIsCudaEnabled(); bool is_vector_handler_hip = matrix_handler_->getIsHipEnabled(); - if ((is_matrix_handler_cuda != is_vector_handler_cuda) || - (is_matrix_handler_hip != is_vector_handler_hip )) { + if ((is_matrix_handler_cuda != is_vector_handler_cuda) || (is_matrix_handler_hip != is_vector_handler_hip)) + { out::error() << "Matrix and vector handler backends are incompatible!\n"; } - if (is_matrix_handler_cuda) { - memspace_ = memory::DEVICE; + if (is_matrix_handler_cuda) + { + memspace_ = memory::DEVICE; device_type_ = memory::CUDADEVICE; - } else if (is_matrix_handler_hip) { - memspace_ = memory::DEVICE; + } + else if (is_matrix_handler_hip) + { + memspace_ = memory::DEVICE; device_type_ = memory::HIPDEVICE; - } else { - memspace_ = memory::HOST; + } + else + { + memspace_ = memory::HOST; device_type_ = memory::NONE; } } diff --git a/resolve/LinSolverIterativeRandFGMRES.hpp b/resolve/LinSolverIterativeRandFGMRES.hpp index 5704697d7..80a6d0b61 100644 --- a/resolve/LinSolverIterativeRandFGMRES.hpp +++ b/resolve/LinSolverIterativeRandFGMRES.hpp @@ -2,23 +2,26 @@ * @file LinSolverIterativeRandFGMRES.hpp * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @brief Declaration of LinSolverIterativeRandFGMRES class - * + * */ #pragma once #include "Common.hpp" #include #include +#include namespace ReSolve { // Forward declarations class SketchingHandler; class GramSchmidt; + namespace matrix { class Sparse; } + namespace vector { class Vector; @@ -26,101 +29,111 @@ namespace ReSolve /** * @brief Randomized (F)GMRES - * + * * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) - * + * * @note Pointers to MatrixHandler and VectorHandler objects are inherited from * LinSolver base class. - * + * */ class LinSolverIterativeRandFGMRES : public LinSolverIterative { - private: - using vector_type = vector::Vector; - - public: - enum SketchingMethod {cs = 0, // count sketch - fwht}; // fast Walsh-Hadamard transform - - LinSolverIterativeRandFGMRES(MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - SketchingMethod rand_method, - GramSchmidt* gs); - - LinSolverIterativeRandFGMRES(index_type restart, - real_type tol, - index_type maxit, - index_type conv_cond, - MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - SketchingMethod rand_method, - GramSchmidt* gs); - - ~LinSolverIterativeRandFGMRES(); - - int solve(vector_type* rhs, vector_type* x) override; - int setup(matrix::Sparse* A) override; - int resetMatrix(matrix::Sparse* new_A) override; - int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; - int setOrthogonalization(GramSchmidt* gs) override; - - int setRestart(index_type restart); - int setFlexible(bool is_flexible); - int setConvergenceCondition(index_type conv_cond); - index_type getRestart() const; - index_type getConvCond() const; - bool getFlexible() const; - - index_type getKrand(); - int setSketchingMethod(SketchingMethod method); - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - enum ParamaterIDs {TOL=0, MAXIT, RESTART, CONV_COND, FLEXIBLE}; - - index_type restart_{10}; ///< GMRES restart - index_type conv_cond_{0}; ///< GMRES convergence condition - bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm - - private: - int allocateSolverData(); - int freeSolverData(); - int allocateSketchingData(); - int freeSketchingData(); - void setMemorySpace(); - void initParamList(); - void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner - - memory::MemorySpace memspace_; - - vector_type* vec_V_{nullptr}; - vector_type* vec_Z_{nullptr}; - // for performing Gram-Schmidt - vector_type* vec_S_{nullptr}; ///< this is where sketched vectors are stored - - real_type* h_H_{nullptr}; - real_type* h_c_{nullptr}; - real_type* h_s_{nullptr}; - real_type* h_rs_{nullptr}; - vector_type* vec_aux_{nullptr}; - - GramSchmidt* GS_{nullptr}; - LinSolverDirect* LU_solver_{nullptr}; - index_type n_{0}; - real_type one_over_k_{1.0}; - - index_type k_rand_{0}; ///< size of sketch space. We need to know it so we can allocate S! - MemoryHandler mem_; ///< Device memory manager object - SketchingHandler* sketching_handler_{nullptr}; - SketchingMethod sketching_method_; - memory::DeviceType device_type_{memory::NONE}; - bool is_solver_set_{false}; - bool is_sketching_set_{false}; + private: + using vector_type = vector::Vector; + + public: + enum SketchingMethod + { + cs = 0, // count sketch + fwht + }; // fast Walsh-Hadamard transform + + LinSolverIterativeRandFGMRES(MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + SketchingMethod rand_method, + GramSchmidt* gs); + + LinSolverIterativeRandFGMRES(index_type restart, + real_type tol, + index_type maxit, + index_type conv_cond, + MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + SketchingMethod rand_method, + GramSchmidt* gs); + + ~LinSolverIterativeRandFGMRES(); + + int solve(vector_type* rhs, vector_type* x) override; + int setup(matrix::Sparse* A) override; + int resetMatrix(matrix::Sparse* new_A) override; + int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; + int setOrthogonalization(GramSchmidt* gs) override; + + int setRestart(index_type restart); + int setFlexible(bool is_flexible); + int setConvergenceCondition(index_type conv_cond); + index_type getRestart() const; + index_type getConvCond() const; + bool getFlexible() const; + + index_type getKrand(); + int setSketchingMethod(SketchingMethod method); + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + enum ParamaterIDs + { + TOL = 0, + MAXIT, + RESTART, + CONV_COND, + FLEXIBLE + }; + + index_type restart_{10}; ///< GMRES restart + index_type conv_cond_{0}; ///< GMRES convergence condition + bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm + + private: + int allocateSolverData(); + int freeSolverData(); + int allocateSketchingData(); + int freeSketchingData(); + void setMemorySpace(); + void initParamList(); + void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner + + memory::MemorySpace memspace_; + + vector_type* vec_V_{nullptr}; + vector_type* vec_Z_{nullptr}; + // for performing Gram-Schmidt + vector_type* vec_S_{nullptr}; ///< this is where sketched vectors are stored + + real_type* h_H_{nullptr}; + real_type* h_c_{nullptr}; + real_type* h_s_{nullptr}; + real_type* h_rs_{nullptr}; + vector_type* vec_aux_{nullptr}; + + GramSchmidt* GS_{nullptr}; + LinSolverDirect* LU_solver_{nullptr}; + index_type n_{0}; + real_type one_over_k_{1.0}; + + index_type k_rand_{0}; ///< size of sketch space. We need to know it so we can allocate S! + MemoryHandler mem_; ///< Device memory manager object + SketchingHandler* sketching_handler_{nullptr}; + SketchingMethod sketching_method_; + memory::DeviceType device_type_{memory::NONE}; + bool is_solver_set_{false}; + bool is_sketching_set_{false}; }; } // namespace ReSolve diff --git a/resolve/MemoryUtils.hpp b/resolve/MemoryUtils.hpp index 38f294a7b..702d80ea6 100644 --- a/resolve/MemoryUtils.hpp +++ b/resolve/MemoryUtils.hpp @@ -1,108 +1,127 @@ #pragma once -#include #include // <- declares `memcpy` +#include + namespace ReSolve { namespace memory { - enum MemorySpace{HOST = 0, DEVICE}; - enum MemoryDirection{HOST_TO_HOST = 0, HOST_TO_DEVICE, DEVICE_TO_HOST, DEVICE_TO_DEVICE}; - enum DeviceType{NONE = 0, CUDADEVICE, HIPDEVICE}; - } -} + enum MemorySpace + { + HOST = 0, + DEVICE + }; + + enum MemoryDirection + { + HOST_TO_HOST = 0, + HOST_TO_DEVICE, + DEVICE_TO_HOST, + DEVICE_TO_DEVICE + }; + + enum DeviceType + { + NONE = 0, + CUDADEVICE, + HIPDEVICE + }; + } // namespace memory +} // namespace ReSolve namespace ReSolve { /** * @class MemoryUtils - * + * * @brief Provides basic memory allocation, free and copy functions. - * + * * This class provedes abstractions for memory management functiosn for * different GPU programming models. - * + * * @tparam Policy - Memory management policy (vendor specific) - * + * * @author Slaven Peles */ template class MemoryUtils { - public: - MemoryUtils() = default; - ~MemoryUtils() = default; - - void deviceSynchronize(); - int getLastDeviceError(); - int deleteOnDevice(void* v); - - template - int allocateArrayOnDevice(T** v, I n); - - template - int allocateBufferOnDevice(T** v, I n); - - template - int setZeroArrayOnDevice(T* v, I n); - - template - int setArrayToConstOnDevice(T* v, T c, I n); - - template - int copyArrayDeviceToHost(T* dst, const T* src, I n); - - template - int copyArrayDeviceToDevice(T* dst, const T* src, I n); - - template - int copyArrayHostToDevice(T* dst, const T* src, I n); - - /// - /// Methods implemented here are always needed - /// - - template - int allocateArrayOnHost(T** v, I n) + public: + MemoryUtils() = default; + ~MemoryUtils() = default; + + void deviceSynchronize(); + int getLastDeviceError(); + int deleteOnDevice(void* v); + + template + int allocateArrayOnDevice(T** v, I n); + + template + int allocateBufferOnDevice(T** v, I n); + + template + int setZeroArrayOnDevice(T* v, I n); + + template + int setArrayToConstOnDevice(T* v, T c, I n); + + template + int copyArrayDeviceToHost(T* dst, const T* src, I n); + + template + int copyArrayDeviceToDevice(T* dst, const T* src, I n); + + template + int copyArrayHostToDevice(T* dst, const T* src, I n); + + /// + /// Methods implemented here are always needed + /// + + template + int allocateArrayOnHost(T** v, I n) + { + std::size_t arraysize = static_cast(n) * sizeof(T); + *v = new T[arraysize]; + return *v == nullptr ? 1 : 0; + } + + template + int deleteOnHost(T* v) + { + delete[] v; + v = nullptr; + return 0; + } + + template + int copyArrayHostToHost(T* dst, const T* src, I n) + { + std::size_t arraysize = static_cast(n) * sizeof(T); + memcpy(dst, src, arraysize); + return 0; + } + + template + int setZeroArrayOnHost(T* v, I n) + { + std::size_t arraysize = static_cast(n) * sizeof(T); + memset(v, 0, arraysize); + return 0; + } + + template + int setArrayToConstOnHost(T* v, T c, I n) + { + for (I i = 0; i < n; ++i) { - std::size_t arraysize = static_cast(n) * sizeof(T); - *v = new T[arraysize]; - return *v == nullptr ? 1 : 0; - } - - template - int deleteOnHost(T* v) - { - delete [] v; - v = nullptr; - return 0; - } - - template - int copyArrayHostToHost(T* dst, const T* src, I n) - { - std::size_t arraysize = static_cast(n) * sizeof(T); - memcpy(dst, src, arraysize); - return 0; - } - - template - int setZeroArrayOnHost(T* v, I n) - { - std::size_t arraysize = static_cast(n) * sizeof(T); - memset(v, 0, arraysize); - return 0; - } - - template - int setArrayToConstOnHost(T* v, T c, I n) - { - for (I i = 0; i < n; ++i) { - v[i] = c; - } - return 0; + v[i] = c; } + return 0; + } }; } // namespace ReSolve @@ -127,4 +146,3 @@ using MemoryHandler = ReSolve::MemoryUtils; using MemoryHandler = ReSolve::MemoryUtils; #endif - diff --git a/resolve/MemoryUtils.tpp b/resolve/MemoryUtils.tpp index 7dc2dc125..7df43f6d4 100644 --- a/resolve/MemoryUtils.tpp +++ b/resolve/MemoryUtils.tpp @@ -79,4 +79,4 @@ namespace ReSolve return Policy::template copyArrayHostToDevice(dst, src, n); } -} // namespace ReSolve \ No newline at end of file +} // namespace ReSolve diff --git a/resolve/Profiling.hpp b/resolve/Profiling.hpp index cb6509dcc..6765d5ba9 100644 --- a/resolve/Profiling.hpp +++ b/resolve/Profiling.hpp @@ -6,15 +6,17 @@ #ifdef RESOLVE_USE_HIP #include #define RESOLVE_RANGE_PUSH(x) roctxRangePush(x) -#define RESOLVE_RANGE_POP(x) roctxRangePop(); \ - roctxMarkA(x) +#define RESOLVE_RANGE_POP(x) \ + roctxRangePop(); \ + roctxMarkA(x) #endif // RESOLVE_USE_HIP #ifdef RESOLVE_USE_CUDA #include #define RESOLVE_RANGE_PUSH(x) nvtxRangePush(x) -#define RESOLVE_RANGE_POP(x) nvtxRangePop(); \ - nvtxMarkA(x) +#define RESOLVE_RANGE_POP(x) \ + nvtxRangePop(); \ + nvtxMarkA(x) #endif // RESOLVE_USE_CUDA #else diff --git a/resolve/RefactorizationSolver.cpp b/resolve/RefactorizationSolver.cpp index 45311f6f3..0b1fab8f5 100644 --- a/resolve/RefactorizationSolver.cpp +++ b/resolve/RefactorizationSolver.cpp @@ -1,8 +1,8 @@ #include "RefactorizationSolver.hpp" -namespace ReSolve +namespace ReSolve { RefactorizationSolver::RefactorizationSolver() { } -} +} // namespace ReSolve diff --git a/resolve/RefactorizationSolver.hpp b/resolve/RefactorizationSolver.hpp index 5beeb8d62..a3fb1b79d 100644 --- a/resolve/RefactorizationSolver.hpp +++ b/resolve/RefactorizationSolver.hpp @@ -7,30 +7,30 @@ namespace ReSolve RefactorizationSolver { using vector_type = vector::Vector; - - public: - RefactorizationSolver(); - ~RefactorizationSolver(); - int setup(std::string first_solver, - std::string refact_solver_, - std::string use_ir_); - int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); - - int solve(matrix::Sparse* A, vector_type* vec_rhs, vector_type* vec_x); - - private: - std::string first_solver_name_; - std::string refact_solver_name_; - std::string use_ir_; - //IR parameters - real_type ir_tol_; - index_type ir_maxit_; - index_type ir_gs_; + public: + RefactorizationSolver(); + ~RefactorizationSolver(); + int setup(std::string first_solver, + std::string refact_solver_, + std::string use_ir_); - LinSolverDirect* first_solver_; - LinSolverDirect* refact_solver_; - LinSolverIterative* ir_solver_; - bool factorization_exists_; + int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); + + int solve(matrix::Sparse * A, vector_type * vec_rhs, vector_type * vec_x); + + private: + std::string first_solver_name_; + std::string refact_solver_name_; + std::string use_ir_; + // IR parameters + real_type ir_tol_; + index_type ir_maxit_; + index_type ir_gs_; + + LinSolverDirect* first_solver_; + LinSolverDirect* refact_solver_; + LinSolverIterative* ir_solver_; + bool factorization_exists_; }; -} +} // namespace ReSolve diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index f3fead184..2b54921a3 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -1,13 +1,13 @@ #include #include -#include +#include +#include +#include +#include #include +#include #include -#include -#include -#include -#include #include #ifdef RESOLVE_USE_KLU @@ -17,16 +17,16 @@ #include #ifdef RESOLVE_USE_CUDA -#include #include #include #include +#include #endif #ifdef RESOLVE_USE_HIP -#include #include #include +#include #endif // Handlers @@ -34,10 +34,8 @@ #include // Utilities -#include - #include "SystemSolver.hpp" - +#include namespace ReSolve { @@ -45,11 +43,11 @@ namespace ReSolve using out = io::Logger; SystemSolver::SystemSolver(LinAlgWorkspaceCpu* workspaceCpu, - std::string factor, - std::string refactor, - std::string solve, - std::string precond, - std::string ir) + std::string factor, + std::string refactor, + std::string solve, + std::string precond, + std::string ir) : workspaceCpu_(workspaceCpu), factorizationMethod_(factor), refactorizationMethod_(refactor), @@ -57,7 +55,8 @@ namespace ReSolve precondition_method_(precond), irMethod_(ir) { - if ((refactor != "none") && (precond != "none")) { + if ((refactor != "none") && (precond != "none")) + { out::warning() << "Incorrect input: " << "Refactorization and preconditioning cannot both be enabled.\n" << "Setting both to 'none' ...\n"; @@ -75,12 +74,12 @@ namespace ReSolve } #ifdef RESOLVE_USE_CUDA - SystemSolver::SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, - std::string factor, - std::string refactor, - std::string solve, - std::string precond, - std::string ir) + SystemSolver::SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, + std::string factor, + std::string refactor, + std::string solve, + std::string precond, + std::string ir) : workspaceCuda_(workspaceCuda), factorizationMethod_(factor), refactorizationMethod_(refactor), @@ -88,7 +87,8 @@ namespace ReSolve precondition_method_(precond), irMethod_(ir) { - if ((refactor != "none") && (precond != "none")) { + if ((refactor != "none") && (precond != "none")) + { out::warning() << "Incorrect input: " << "Refactorization and preconditioning cannot both be enabled.\n" << "Setting both to 'none' ...\n"; @@ -107,12 +107,12 @@ namespace ReSolve #endif #ifdef RESOLVE_USE_HIP - SystemSolver::SystemSolver(LinAlgWorkspaceHIP* workspaceHip, - std::string factor, - std::string refactor, - std::string solve, - std::string precond, - std::string ir) + SystemSolver::SystemSolver(LinAlgWorkspaceHIP* workspaceHip, + std::string factor, + std::string refactor, + std::string solve, + std::string precond, + std::string ir) : workspaceHip_(workspaceHip), factorizationMethod_(factor), refactorizationMethod_(refactor), @@ -120,7 +120,8 @@ namespace ReSolve precondition_method_(precond), irMethod_(ir) { - if ((refactor != "none") && (precond != "none")) { + if ((refactor != "none") && (precond != "none")) + { out::warning() << "Incorrect input: " << "Refactorization and preconditioning cannot both be enabled.\n" << "Setting both to 'none' ...\n"; @@ -142,27 +143,33 @@ namespace ReSolve { delete resVector_; - if (factorizationMethod_ != "none") { + if (factorizationMethod_ != "none") + { delete factorizationSolver_; } - if (refactorizationMethod_ != "none") { + if (refactorizationMethod_ != "none") + { delete refactorizationSolver_; } - if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") { + if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") + { delete iterativeSolver_; } - if (gs_ != nullptr) { + if (gs_ != nullptr) + { delete gs_; } - if (irMethod_ != "none") { + if (irMethod_ != "none") + { delete iterativeSolver_; } - if (precondition_method_ != "none") { + if (precondition_method_ != "none") + { delete preconditioner_; } @@ -173,23 +180,31 @@ namespace ReSolve int SystemSolver::setMatrix(matrix::Sparse* A) { int status = 0; - A_ = A; + A_ = A; resVector_ = new vector_type(A->getNumRows()); - if (memspace_ == "cpu") { + if (memspace_ == "cpu") + { resVector_->allocate(memory::HOST); - } else { + } + else + { resVector_->allocate(memory::DEVICE); matrixHandler_->setValuesChanged(true, memory::DEVICE); } // If we use iterative solver, we can set it up here - if (solveMethod_ == "randgmres") { - auto* rgmres = dynamic_cast(iterativeSolver_); - status += rgmres->setup(A_); - } else if (solveMethod_ == "fgmres") { - auto* fgmres = dynamic_cast(iterativeSolver_); - status += fgmres->setup(A_); - } else { + if (solveMethod_ == "randgmres") + { + auto* rgmres = dynamic_cast(iterativeSolver_); + status += rgmres->setup(A_); + } + else if (solveMethod_ == "fgmres") + { + auto* fgmres = dynamic_cast(iterativeSolver_); + status += fgmres->setup(A_); + } + else + { // do nothing } @@ -205,62 +220,84 @@ namespace ReSolve int SystemSolver::initialize() { // First delete old objects - if (factorizationSolver_) { + if (factorizationSolver_) + { delete factorizationSolver_; factorizationSolver_ = nullptr; } - if (refactorizationSolver_) { + if (refactorizationSolver_) + { delete refactorizationSolver_; refactorizationSolver_ = nullptr; } - if (preconditioner_) { + if (preconditioner_) + { delete preconditioner_; preconditioner_ = nullptr; } - if (iterativeSolver_) { + if (iterativeSolver_) + { delete iterativeSolver_; iterativeSolver_ = nullptr; } - if (gs_) { + if (gs_) + { delete gs_; gs_ = nullptr; } // Create factorization solver - if (factorizationMethod_ == "none") { + if (factorizationMethod_ == "none") + { // do nothing #ifdef RESOLVE_USE_KLU - } else if (factorizationMethod_ == "klu") { + } + else if (factorizationMethod_ == "klu") + { factorizationSolver_ = new ReSolve::LinSolverDirectKLU(); #endif - } else { + } + else + { out::error() << "Unrecognized factorization " << factorizationMethod_ << "\n"; return 1; } // Create refactorization solver - if (refactorizationMethod_ == "none") { + if (refactorizationMethod_ == "none") + { // do nothing - } else if (refactorizationMethod_ == "klu") { + } + else if (refactorizationMethod_ == "klu") + { // do nothing for now, KLU is the only factorization solver available #ifdef RESOLVE_USE_CUDA - } else if (refactorizationMethod_ == "glu") { + } + else if (refactorizationMethod_ == "glu") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverGLU(workspaceCuda_); - } else if (refactorizationMethod_ == "cusolverrf") { + } + else if (refactorizationMethod_ == "cusolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverRf(); #endif #ifdef RESOLVE_USE_HIP - } else if (refactorizationMethod_ == "rocsolverrf") { + } + else if (refactorizationMethod_ == "rocsolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectRocSolverRf(workspaceHip_); #endif - } else { + } + else + { out::error() << "Refactorization method " << refactorizationMethod_ << " not recognized ...\n"; return 1; } // Create iterative refinement - if (irMethod_ == "fgmres") { + if (irMethod_ == "fgmres") + { setGramSchmidtMethod(gsMethod_); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, @@ -268,39 +305,57 @@ namespace ReSolve } // Create preconditioner - if (precondition_method_ == "none") { + if (precondition_method_ == "none") + { // do nothing - } else if (precondition_method_ == "ilu0") { - if (memspace_ == "cpu") { + } + else if (precondition_method_ == "ilu0") + { + if (memspace_ == "cpu") + { // preconditioner_ = new LinSolverDirectSerialILU0(workspaceCpu_); preconditioner_ = new LinSolverDirectCpuILU0(workspaceCpu_); #ifdef RESOLVE_USE_CUDA - } else if (memspace_ == "cuda") { + } + else if (memspace_ == "cuda") + { preconditioner_ = new LinSolverDirectCuSparseILU0(workspaceCuda_); #endif #ifdef RESOLVE_USE_HIP - } else if (memspace_ == "hip") { + } + else if (memspace_ == "hip") + { preconditioner_ = new LinSolverDirectRocSparseILU0(workspaceHip_); #endif - } else { + } + else + { out::error() << "Memory space " << memspace_ - << " not recognized ...\n"; + << " not recognized ...\n"; return 1; } - } else { + } + else + { out::error() << "Preconditioner method " << precondition_method_ << " not recognized ...\n"; return 1; } // Create iterative solver - if (solveMethod_ == "randgmres") { + if (solveMethod_ == "randgmres") + { LinSolverIterativeRandFGMRES::SketchingMethod sketch; - if (sketching_method_ == "count") { + if (sketching_method_ == "count") + { sketch = LinSolverIterativeRandFGMRES::cs; - } else if (sketching_method_ == "fwht") { + } + else if (sketching_method_ == "fwht") + { sketch = LinSolverIterativeRandFGMRES::fwht; - } else { + } + else + { out::warning() << "Sketching method " << sketching_method_ << " not recognized!\n" << "Using default.\n"; sketch = LinSolverIterativeRandFGMRES::cs; @@ -310,12 +365,16 @@ namespace ReSolve vectorHandler_, sketch, gs_); - } else if (solveMethod_ == "fgmres") { + } + else if (solveMethod_ == "fgmres") + { setGramSchmidtMethod(gsMethod_); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, gs_); - } else { + } + else + { // do nothing } @@ -324,12 +383,14 @@ namespace ReSolve int SystemSolver::analyze() { - if (A_ == nullptr) { + if (A_ == nullptr) + { out::error() << "System matrix not set!\n"; return 1; } - if (factorizationMethod_ == "klu") { + if (factorizationMethod_ == "klu") + { factorizationSolver_->setup(A_); return factorizationSolver_->analyze(); } @@ -338,7 +399,8 @@ namespace ReSolve int SystemSolver::factorize() { - if (factorizationMethod_ == "klu") { + if (factorizationMethod_ == "klu") + { is_solve_on_device_ = false; return factorizationSolver_->factorize(); } @@ -347,15 +409,15 @@ namespace ReSolve int SystemSolver::refactorize() { - if (refactorizationMethod_ == "klu") { + if (refactorizationMethod_ == "klu") + { return factorizationSolver_->refactorize(); } - if (refactorizationMethod_ == "glu" || - refactorizationMethod_ == "cusolverrf" || - refactorizationMethod_ == "rocsolverrf") { - is_solve_on_device_ = true; - return refactorizationSolver_->refactorize(); + if (refactorizationMethod_ == "glu" || refactorizationMethod_ == "cusolverrf" || refactorizationMethod_ == "rocsolverrf") + { + is_solve_on_device_ = true; + return refactorizationSolver_->refactorize(); } return 1; @@ -381,23 +443,27 @@ namespace ReSolve int SystemSolver::refactorizationSetup() { int status = 0; + // Get factors and permutation vectors L_ = factorizationSolver_->getLFactor(); U_ = factorizationSolver_->getUFactor(); P_ = factorizationSolver_->getPOrdering(); Q_ = factorizationSolver_->getQOrdering(); - if (L_ == nullptr) { + if (L_ == nullptr) + { out::error() << "Factorization failed, cannot extract factors ...\n"; status += 1; } #ifdef RESOLVE_USE_CUDA - if (refactorizationMethod_ == "glu") { - is_solve_on_device_ = true; - status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); + if (refactorizationMethod_ == "glu") + { + is_solve_on_device_ = true; + status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); } - if (refactorizationMethod_ == "cusolverrf") { + if (refactorizationMethod_ == "cusolverrf") + { status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); LinSolverDirectCuSolverRf* Rf = dynamic_cast(refactorizationSolver_); @@ -408,15 +474,17 @@ namespace ReSolve #endif #ifdef RESOLVE_USE_HIP - if (refactorizationMethod_ == "rocsolverrf") { + if (refactorizationMethod_ == "rocsolverrf") + { is_solve_on_device_ = false; - auto* Rf = dynamic_cast(refactorizationSolver_); + auto* Rf = dynamic_cast(refactorizationSolver_); Rf->setSolveMode(1); status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_, resVector_); } #endif - if (irMethod_ == "fgmres") { + if (irMethod_ == "fgmres") + { status += iterativeSolver_->setup(A_); status += iterativeSolver_->setupPreconditioner("LU", refactorizationSolver_); } @@ -442,26 +510,34 @@ namespace ReSolve int status = 0; // Use Krylov solver if selected - if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") { + if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") + { status += iterativeSolver_->resetMatrix(A_); status += iterativeSolver_->solve(rhs, x); return status; } - if (solveMethod_ == "klu") { + if (solveMethod_ == "klu") + { status += factorizationSolver_->solve(rhs, x); } - if (solveMethod_ == "glu" || solveMethod_ == "cusolverrf" || solveMethod_ == "rocsolverrf") { - if (is_solve_on_device_) { + if (solveMethod_ == "glu" || solveMethod_ == "cusolverrf" || solveMethod_ == "rocsolverrf") + { + if (is_solve_on_device_) + { status += refactorizationSolver_->solve(rhs, x); - } else { + } + else + { status += factorizationSolver_->solve(rhs, x); } } - if (irMethod_ == "fgmres") { - if (is_solve_on_device_) { + if (irMethod_ == "fgmres") + { + if (is_solve_on_device_) + { status += refine(rhs, x); } } @@ -471,9 +547,11 @@ namespace ReSolve int SystemSolver::preconditionerSetup() { int status = 0; - if (precondition_method_ == "ilu0") { + if (precondition_method_ == "ilu0") + { status += preconditioner_->setup(A_); - if (memspace_ != "cpu") { + if (memspace_ != "cpu") + { is_solve_on_device_ = true; } iterativeSolver_->setupPreconditioner("LU", preconditioner_); @@ -525,25 +603,35 @@ namespace ReSolve void SystemSolver::setRefactorizationMethod(std::string method) { refactorizationMethod_ = method; - if (refactorizationSolver_) { + if (refactorizationSolver_) + { delete refactorizationSolver_; refactorizationSolver_ = nullptr; } // Create refactorization solver - if (refactorizationMethod_ == "klu") { + if (refactorizationMethod_ == "klu") + { // do nothing for now #ifdef RESOLVE_USE_CUDA - } else if (refactorizationMethod_ == "glu") { + } + else if (refactorizationMethod_ == "glu") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverGLU(workspaceCuda_); - } else if (refactorizationMethod_ == "cusolverrf") { + } + else if (refactorizationMethod_ == "cusolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverRf(); #endif #ifdef RESOLVE_USE_HIP - } else if (refactorizationMethod_ == "rocsolverrf") { + } + else if (refactorizationMethod_ == "rocsolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectRocSolverRf(workspaceHip_); #endif - } else { + } + else + { out::error() << "Refactorization method " << refactorizationMethod_ << " not recognized ...\n"; } @@ -564,13 +652,19 @@ namespace ReSolve if (iterativeSolver_) delete iterativeSolver_; - if (method == "randgmres") { + if (method == "randgmres") + { LinSolverIterativeRandFGMRES::SketchingMethod sketch; - if (sketching_method_ == "count") { + if (sketching_method_ == "count") + { sketch = LinSolverIterativeRandFGMRES::cs; - } else if (sketching_method_ == "fwht") { + } + else if (sketching_method_ == "fwht") + { sketch = LinSolverIterativeRandFGMRES::fwht; - } else { + } + else + { out::warning() << "Sketching method " << sketching_method_ << " not recognized!\n" << "Using default.\n"; sketch = LinSolverIterativeRandFGMRES::cs; @@ -581,12 +675,16 @@ namespace ReSolve vectorHandler_, sketch, gs_); - } else if (solveMethod_ == "fgmres") { + } + else if (solveMethod_ == "fgmres") + { setGramSchmidtMethod(gsMethod_); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, gs_); - } else { + } + else + { out::error() << "Solve method " << solveMethod_ << " not recognized ...\n"; return 1; @@ -613,7 +711,8 @@ namespace ReSolve if (method == "none") return; - if (memspace_ == "cpu") { + if (memspace_ == "cpu") + { method = "none"; out::warning() << "Iterative refinement not supported on CPU. " << "Turning off ...\n"; @@ -622,13 +721,16 @@ namespace ReSolve gsMethod_ = gsMethod; - if (method == "fgmres") { + if (method == "fgmres") + { setGramSchmidtMethod(gsMethod); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, gs_); - irMethod_ = method; - } else { + irMethod_ = method; + } + else + { out::error() << "Iterative refinement method " << method << " not recognized.\n"; } } @@ -636,18 +738,26 @@ namespace ReSolve real_type SystemSolver::getVectorNorm(vector_type* rhs) { using namespace ReSolve::constants; - real_type norm_b = 0.0; - if (memspace_ == "cpu") { + real_type norm_b = 0.0; + if (memspace_ == "cpu") + { norm_b = std::sqrt(vectorHandler_->dot(rhs, rhs, memory::HOST)); #if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) - } else if (memspace_ == "cuda" || memspace_ == "hip") { - if (is_solve_on_device_) { + } + else if (memspace_ == "cuda" || memspace_ == "hip") + { + if (is_solve_on_device_) + { norm_b = std::sqrt(vectorHandler_->dot(rhs, rhs, memory::DEVICE)); - } else { + } + else + { norm_b = std::sqrt(vectorHandler_->dot(rhs, rhs, memory::HOST)); } #endif - } else { + } + else + { out::error() << "Unrecognized device " << memspace_ << "\n"; return -1.0; } @@ -658,54 +768,70 @@ namespace ReSolve { using namespace ReSolve::constants; assert(rhs->getSize() == resVector_->getSize()); - real_type norm_b = 0.0; - real_type resnorm = 0.0; - memory::MemorySpace ms = memory::HOST; - if (memspace_ == "cpu") { + real_type norm_b = 0.0; + real_type resnorm = 0.0; + memory::MemorySpace ms = memory::HOST; + if (memspace_ == "cpu") + { resVector_->copyDataFrom(rhs, memory::HOST, memory::HOST); norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::HOST)); #if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) - } else if (memspace_ == "cuda" || memspace_ == "hip") { - if (is_solve_on_device_) { + } + else if (memspace_ == "cuda" || memspace_ == "hip") + { + if (is_solve_on_device_) + { resVector_->copyDataFrom(rhs, memory::DEVICE, memory::DEVICE); norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::DEVICE)); - } else { + } + else + { resVector_->copyDataFrom(rhs, memory::HOST, memory::DEVICE); norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::HOST)); // ms = memory::HOST; } ms = memory::DEVICE; #endif - } else { + } + else + { out::error() << "Unrecognized device " << memspace_ << "\n"; return -1.0; } matrixHandler_->setValuesChanged(true, ms); matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUS_ONE, ms); resnorm = std::sqrt(vectorHandler_->dot(resVector_, resVector_, ms)); - return resnorm/norm_b; + return resnorm / norm_b; } real_type SystemSolver::getNormOfScaledResiduals(vector_type* rhs, vector_type* x) { using namespace ReSolve::constants; assert(rhs->getSize() == resVector_->getSize()); - real_type norm_x = 0.0; - real_type norm_A = 0.0; - real_type resnorm = 0.0; - memory::MemorySpace ms = memory::HOST; - if (memspace_ == "cpu") { + real_type norm_x = 0.0; + real_type norm_A = 0.0; + real_type resnorm = 0.0; + memory::MemorySpace ms = memory::HOST; + if (memspace_ == "cpu") + { resVector_->copyDataFrom(rhs, memory::HOST, memory::HOST); #if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) - } else if (memspace_ == "cuda" || memspace_ == "hip") { - if (is_solve_on_device_) { + } + else if (memspace_ == "cuda" || memspace_ == "hip") + { + if (is_solve_on_device_) + { resVector_->copyDataFrom(rhs, memory::DEVICE, memory::DEVICE); - } else { + } + else + { resVector_->copyDataFrom(rhs, memory::HOST, memory::DEVICE); } ms = memory::DEVICE; #endif - } else { + } + else + { out::error() << "Unrecognized device " << memspace_ << "\n"; return -1.0; } @@ -734,18 +860,24 @@ namespace ReSolve */ int SystemSolver::setSketchingMethod(std::string sketching_method) { - if (solveMethod_ != "randgmres") { + if (solveMethod_ != "randgmres") + { out::warning() << "Trying to set sketching method to an incompatible solver.\n"; out::warning() << "The setting will be ignored.\n"; return 1; } LinSolverIterativeRandFGMRES::SketchingMethod tmp; - if (sketching_method == "count") { + if (sketching_method == "count") + { tmp = LinSolverIterativeRandFGMRES::cs; - } else if (sketching_method == "fwht") { + } + else if (sketching_method == "fwht") + { tmp = LinSolverIterativeRandFGMRES::fwht; - } else { + } + else + { out::warning() << "Sketching method " << sketching_method << " not recognized!\n" << "Using default (count sketch).\n"; tmp = LinSolverIterativeRandFGMRES::cs; @@ -754,7 +886,8 @@ namespace ReSolve sketching_method_ = sketching_method; // At this point iterative solver, if created, can only be LinSolverIterativeRandFGMRES - if (iterativeSolver_) { + if (iterativeSolver_) + { // TODO: Use cast here as a temporary solution; will be replaced by parameter setting framework auto* sol = dynamic_cast(iterativeSolver_); sol->setSketchingMethod(tmp); @@ -771,25 +904,39 @@ namespace ReSolve { // Map string input to the Gram-Schmidt variant enum GramSchmidt::GSVariant gs_variant; - if (variant == "cgs2") { + if (variant == "cgs2") + { gs_variant = GramSchmidt::CGS2; - } else if (variant == "mgs") { + } + else if (variant == "mgs") + { gs_variant = GramSchmidt::MGS; - } else if (variant == "mgs_two_sync") { + } + else if (variant == "mgs_two_sync") + { gs_variant = GramSchmidt::MGS_TWO_SYNC; - } else if (variant == "mgs_pm") { + } + else if (variant == "mgs_pm") + { gs_variant = GramSchmidt::MGS_PM; - } else if (variant == "cgs1") { + } + else if (variant == "cgs1") + { gs_variant = GramSchmidt::CGS1; - } else { + } + else + { out::warning() << "Gram-Schmidt variant " << variant << " not recognized.\n"; out::warning() << "Using default CGS2 Gram-Schmidt variant.\n"; gs_variant = GramSchmidt::CGS2; } - if (gs_) { + if (gs_) + { gs_->setVariant(gs_variant); - } else { + } + else + { gs_ = new GramSchmidt(vectorHandler_, gs_variant); } diff --git a/resolve/SystemSolver.hpp b/resolve/SystemSolver.hpp index c204dd470..0aa5173c1 100644 --- a/resolve/SystemSolver.hpp +++ b/resolve/SystemSolver.hpp @@ -1,4 +1,4 @@ -//this is to solve the system, can call different linear solvers if necessary +// this is to solve the system, can call different linear solvers if necessary namespace ReSolve { class LinSolverDirectKLU; @@ -23,107 +23,106 @@ namespace ReSolve class SystemSolver { - public: - using vector_type = vector::Vector; - using matrix_type = matrix::Sparse; - - /// @brief Temporary until abstract preconditioner class is created - using precond_type = LinSolverDirect; - - SystemSolver(LinAlgWorkspaceCpu* workspaceCpu, - std::string factor = "klu", - std::string refactor = "klu", - std::string solve = "klu", - std::string precond = "none", - std::string ir = "none"); - SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, - std::string factor = "klu", - std::string refactor = "cusolverrf", - std::string solve = "cusolverrf", - std::string precond = "none", - std::string ir = "none"); - SystemSolver(LinAlgWorkspaceHIP* workspaceHip, - std::string factor = "klu", - std::string refactor = "rocsolverrf", - std::string solve = "rocsolverrf", - std::string precond = "none", - std::string ir = "none"); - - ~SystemSolver(); - - int initialize(); - int setMatrix(matrix::Sparse* A); - int analyze(); // symbolic part - int factorize(); // numeric part - int refactorize(); - int refactorizationSetup(); - int preconditionerSetup(); - int solve(vector_type* rhs, vector_type* x); // for direct and iterative - int refine(vector_type* rhs, vector_type* x); // for iterative refinement - - // we update the matrix once it changed - int updateMatrix(std::string format, int* ia, int* ja, double* a); - - LinSolverDirect& getFactorizationSolver(); - LinSolverDirect& getRefactorizationSolver(); - LinSolverIterative& getIterativeSolver(); - - real_type getVectorNorm(vector_type* rhs); - real_type getResidualNorm(vector_type* rhs, vector_type* x); - real_type getNormOfScaledResiduals(vector_type* rhs, vector_type* x); - - // Get solver parameters - const std::string getFactorizationMethod() const; - const std::string getRefactorizationMethod() const; - const std::string getSolveMethod() const; - const std::string getRefinementMethod() const; - const std::string getOrthogonalizationMethod() const; - - // Set solver parameters - void setFactorizationMethod(std::string method); - void setRefactorizationMethod(std::string method); - int setSolveMethod(std::string method); - void setRefinementMethod(std::string method, std::string gs = "cgs2"); - int setSketchingMethod(std::string method); - int setGramSchmidtMethod(std::string gs_method); - - private: - - LinSolverDirect* factorizationSolver_{nullptr}; - LinSolverDirect* refactorizationSolver_{nullptr}; - LinSolverIterative* iterativeSolver_{nullptr}; - GramSchmidt* gs_{nullptr}; - - precond_type* preconditioner_{nullptr}; - - LinAlgWorkspaceCUDA* workspaceCuda_{nullptr}; - LinAlgWorkspaceHIP* workspaceHip_{nullptr}; - LinAlgWorkspaceCpu* workspaceCpu_{nullptr}; - - MatrixHandler* matrixHandler_{nullptr}; - VectorHandler* vectorHandler_{nullptr}; - - bool is_solve_on_device_{false}; - - matrix_type* L_{nullptr}; - matrix_type* U_{nullptr}; - - index_type* P_{nullptr}; - index_type* Q_{nullptr}; - - vector_type* resVector_{nullptr}; - - matrix::Sparse* A_{nullptr}; - - // Configuration parameters - std::string factorizationMethod_{"none"}; - std::string refactorizationMethod_{"none"}; - std::string solveMethod_{"none"}; - std::string precondition_method_{"none"}; - std::string irMethod_{"none"}; - std::string gsMethod_{"cgs2"}; - std::string sketching_method_{"count"}; ///< @todo move this to LinSolverIterative class - - std::string memspace_; + public: + using vector_type = vector::Vector; + using matrix_type = matrix::Sparse; + + /// @brief Temporary until abstract preconditioner class is created + using precond_type = LinSolverDirect; + + SystemSolver(LinAlgWorkspaceCpu* workspaceCpu, + std::string factor = "klu", + std::string refactor = "klu", + std::string solve = "klu", + std::string precond = "none", + std::string ir = "none"); + SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, + std::string factor = "klu", + std::string refactor = "cusolverrf", + std::string solve = "cusolverrf", + std::string precond = "none", + std::string ir = "none"); + SystemSolver(LinAlgWorkspaceHIP* workspaceHip, + std::string factor = "klu", + std::string refactor = "rocsolverrf", + std::string solve = "rocsolverrf", + std::string precond = "none", + std::string ir = "none"); + + ~SystemSolver(); + + int initialize(); + int setMatrix(matrix::Sparse* A); + int analyze(); // symbolic part + int factorize(); // numeric part + int refactorize(); + int refactorizationSetup(); + int preconditionerSetup(); + int solve(vector_type* rhs, vector_type* x); // for direct and iterative + int refine(vector_type* rhs, vector_type* x); // for iterative refinement + + // we update the matrix once it changed + int updateMatrix(std::string format, int* ia, int* ja, double* a); + + LinSolverDirect& getFactorizationSolver(); + LinSolverDirect& getRefactorizationSolver(); + LinSolverIterative& getIterativeSolver(); + + real_type getVectorNorm(vector_type* rhs); + real_type getResidualNorm(vector_type* rhs, vector_type* x); + real_type getNormOfScaledResiduals(vector_type* rhs, vector_type* x); + + // Get solver parameters + const std::string getFactorizationMethod() const; + const std::string getRefactorizationMethod() const; + const std::string getSolveMethod() const; + const std::string getRefinementMethod() const; + const std::string getOrthogonalizationMethod() const; + + // Set solver parameters + void setFactorizationMethod(std::string method); + void setRefactorizationMethod(std::string method); + int setSolveMethod(std::string method); + void setRefinementMethod(std::string method, std::string gs = "cgs2"); + int setSketchingMethod(std::string method); + int setGramSchmidtMethod(std::string gs_method); + + private: + LinSolverDirect* factorizationSolver_{nullptr}; + LinSolverDirect* refactorizationSolver_{nullptr}; + LinSolverIterative* iterativeSolver_{nullptr}; + GramSchmidt* gs_{nullptr}; + + precond_type* preconditioner_{nullptr}; + + LinAlgWorkspaceCUDA* workspaceCuda_{nullptr}; + LinAlgWorkspaceHIP* workspaceHip_{nullptr}; + LinAlgWorkspaceCpu* workspaceCpu_{nullptr}; + + MatrixHandler* matrixHandler_{nullptr}; + VectorHandler* vectorHandler_{nullptr}; + + bool is_solve_on_device_{false}; + + matrix_type* L_{nullptr}; + matrix_type* U_{nullptr}; + + index_type* P_{nullptr}; + index_type* Q_{nullptr}; + + vector_type* resVector_{nullptr}; + + matrix::Sparse* A_{nullptr}; + + // Configuration parameters + std::string factorizationMethod_{"none"}; + std::string refactorizationMethod_{"none"}; + std::string solveMethod_{"none"}; + std::string precondition_method_{"none"}; + std::string irMethod_{"none"}; + std::string gsMethod_{"cgs2"}; + std::string sketching_method_{"count"}; ///< @todo move this to LinSolverIterative class + + std::string memspace_; }; } // namespace ReSolve diff --git a/resolve/cpu/CpuMemory.hpp b/resolve/cpu/CpuMemory.hpp index 96c9a6ebc..a955b542e 100644 --- a/resolve/cpu/CpuMemory.hpp +++ b/resolve/cpu/CpuMemory.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include namespace ReSolve @@ -8,7 +9,7 @@ namespace ReSolve { /** * @brief Class containing dummy functions when there is no GPU support. - * + * * @author Slaven Peles */ struct Cpu @@ -20,10 +21,10 @@ namespace ReSolve { // Nothing to synchronize } - + /** * @brief Dummy function to stand in when GPU support is not enabled. - * + * * @return Allways return success! */ static int getLastDeviceError() @@ -31,10 +32,10 @@ namespace ReSolve // not on device, nothing to get return 0; } - - /** + + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -46,9 +47,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -61,9 +62,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -76,9 +77,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -91,9 +92,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -106,9 +107,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -121,9 +122,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -146,4 +147,4 @@ namespace ReSolve }; // struct Cuda } // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/cpu/MemoryUtils.cpp b/resolve/cpu/MemoryUtils.cpp index 74944695f..d684f51d9 100644 --- a/resolve/cpu/MemoryUtils.cpp +++ b/resolve/cpu/MemoryUtils.cpp @@ -1,19 +1,19 @@ /** * @file MemoryUtils.cpp - * + * * This file includes MemoryUtils.tpp and specifies what functions to * instantiate from function templates. - * + * * @author Slaven Peles */ - #include #include #include #include +#define RESOLVE_FOOL_CLANG_FORMAT #include namespace ReSolve @@ -22,21 +22,21 @@ namespace ReSolve template int MemoryUtils::getLastDeviceError(); template int MemoryUtils::deleteOnDevice(void*); - template int MemoryUtils::allocateArrayOnDevice( real_type**, index_type); - template int MemoryUtils::allocateArrayOnDevice(index_type**, index_type); + template int MemoryUtils::allocateArrayOnDevice(real_type**, index_type); + template int MemoryUtils::allocateArrayOnDevice(index_type**, index_type); - template int MemoryUtils::allocateBufferOnDevice(void** v, size_t n); + template int MemoryUtils::allocateBufferOnDevice(void** v, size_t n); - template int MemoryUtils::setZeroArrayOnDevice( real_type*, index_type); + template int MemoryUtils::setZeroArrayOnDevice(real_type*, index_type); - template int MemoryUtils::setArrayToConstOnDevice( real_type*, real_type, index_type); + template int MemoryUtils::setArrayToConstOnDevice(real_type*, real_type, index_type); - template int MemoryUtils::copyArrayDeviceToHost( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayDeviceToHost(index_type*, const index_type*, index_type); + template int MemoryUtils::copyArrayDeviceToHost(real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayDeviceToHost(index_type*, const index_type*, index_type); - template int MemoryUtils::copyArrayDeviceToDevice( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayDeviceToDevice(index_type*, const index_type*, index_type); + template int MemoryUtils::copyArrayDeviceToDevice(real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayDeviceToDevice(index_type*, const index_type*, index_type); - template int MemoryUtils::copyArrayHostToDevice( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayHostToDevice(index_type*, const index_type*, index_type); -} + template int MemoryUtils::copyArrayHostToDevice(real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayHostToDevice(index_type*, const index_type*, index_type); +} // namespace ReSolve diff --git a/resolve/cuda/CMakeLists.txt b/resolve/cuda/CMakeLists.txt index c574501fe..9657000ed 100644 --- a/resolve/cuda/CMakeLists.txt +++ b/resolve/cuda/CMakeLists.txt @@ -32,4 +32,3 @@ target_link_libraries(resolve_backend_cuda PUBLIC resolve_cuda) # install include headers install(FILES ${ReSolve_CUDA_HEADER_INSTALL} DESTINATION include/resolve/cuda) - diff --git a/resolve/cuda/CudaMemory.hpp b/resolve/cuda/CudaMemory.hpp index b5f888491..af51add1e 100644 --- a/resolve/cuda/CudaMemory.hpp +++ b/resolve/cuda/CudaMemory.hpp @@ -1,9 +1,10 @@ #pragma once -#include #include +#include #include + #include "cuda_check_errors.hpp" namespace ReSolve @@ -12,10 +13,10 @@ namespace ReSolve { /** * @brief Class containing wrappers for CUDA API functions. - * + * * All wrappers are implemented as static functions returning integer * error code from CUDA API functions. - * + * * @author Slaven Peles */ struct Cuda @@ -24,13 +25,13 @@ namespace ReSolve { cudaDeviceSynchronize(); } - + static int getLastDeviceError() { return static_cast(cudaGetLastError()); } - - /** + + /** * @brief deletes variable from device * * @param v - a variable on the device @@ -47,7 +48,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of array elements (int, size_t) - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -61,13 +62,13 @@ namespace ReSolve /** * @brief allocates buffer v onto device. - * + * * The difference from the array is that buffer size is required in bytes, * not number of elements. * * @param v - pointer to the buffer to be allocated on the device * @param n - size of the buffer in bytes - * + * * @tparam T - Buffer element data type type (typically void) * @tparam I - Buffer size type (typically size_t) * @@ -84,7 +85,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -102,7 +103,7 @@ namespace ReSolve * @param v - pointer to the array to be allocated on the device * @param c - value to set all array elements * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -115,7 +116,7 @@ namespace ReSolve return checkCudaErrors(0); } - /** + /** * @brief Copies array `src` from device to the array `dst` on the host. * * @param[in] n - size of src array @@ -165,8 +166,7 @@ namespace ReSolve { return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyHostToDevice)); } - }; - } + } // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/cuda/MemoryUtils.cu b/resolve/cuda/MemoryUtils.cu index bc17a0af0..00692a8aa 100644 --- a/resolve/cuda/MemoryUtils.cu +++ b/resolve/cuda/MemoryUtils.cu @@ -1,42 +1,42 @@ /** * @file MemoryUtils.cu - * + * * This file includes MemoryUtils.tpp and specifies what functions to * instantiate from function templates. - * + * * @author Slaven Peles */ - #include #include #include +#define RESOLVE_FOOL_CLANG_FORMAT #include namespace ReSolve { template void MemoryUtils::deviceSynchronize(); - template int MemoryUtils::getLastDeviceError(); - template int MemoryUtils::deleteOnDevice(void*); + template int MemoryUtils::getLastDeviceError(); + template int MemoryUtils::deleteOnDevice(void*); - template int MemoryUtils::allocateArrayOnDevice( real_type**, index_type); + template int MemoryUtils::allocateArrayOnDevice(real_type**, index_type); template int MemoryUtils::allocateArrayOnDevice(index_type**, index_type); template int MemoryUtils::allocateBufferOnDevice(void** v, size_t n); - template int MemoryUtils::setZeroArrayOnDevice( real_type*, index_type); + template int MemoryUtils::setZeroArrayOnDevice(real_type*, index_type); - template int MemoryUtils::setArrayToConstOnDevice( real_type*, real_type, index_type); + template int MemoryUtils::setArrayToConstOnDevice(real_type*, real_type, index_type); - template int MemoryUtils::copyArrayDeviceToHost( real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayDeviceToHost(real_type*, const real_type*, index_type); template int MemoryUtils::copyArrayDeviceToHost(index_type*, const index_type*, index_type); - template int MemoryUtils::copyArrayDeviceToDevice( real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayDeviceToDevice(real_type*, const real_type*, index_type); template int MemoryUtils::copyArrayDeviceToDevice(index_type*, const index_type*, index_type); - template int MemoryUtils::copyArrayHostToDevice( real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayHostToDevice(real_type*, const real_type*, index_type); template int MemoryUtils::copyArrayHostToDevice(index_type*, const index_type*, index_type); -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/cuda/cudaKernels.cu b/resolve/cuda/cudaKernels.cu index 2c196fcf4..09daa29ac 100644 --- a/resolve/cuda/cudaKernels.cu +++ b/resolve/cuda/cudaKernels.cu @@ -7,13 +7,16 @@ * */ -#include "cudaKernels.h" #include +#include "cudaKernels.h" -namespace ReSolve { - namespace cuda { - namespace kernels { +namespace ReSolve +{ + namespace cuda + { + namespace kernels + { /** * @brief Computes v^T * [u1 u2] where v is n x k multivector @@ -32,11 +35,11 @@ namespace ReSolve { __global__ void MassIPTwoVec(const real_type* __restrict__ u1, const real_type* __restrict__ u2, const real_type* __restrict__ v, - real_type* result, + real_type* result, const index_type k, const index_type N) { - index_type t = threadIdx.x; + index_type t = threadIdx.x; index_type bsize = blockDim.x; // assume T threads per thread block (and k reductions to be performed) @@ -44,17 +47,18 @@ namespace ReSolve { volatile __shared__ real_type s_tmp2[Tv5]; // map between thread index space and the problem index space - index_type j = blockIdx.x; - s_tmp1[t] = 0.0; - s_tmp2[t] = 0.0; + index_type j = blockIdx.x; + s_tmp1[t] = 0.0; + s_tmp2[t] = 0.0; index_type nn = t; - real_type can1, can2, cbn; + real_type can1, can2, cbn; - while(nn < N) { + while (nn < N) + { can1 = u1[nn]; can2 = u2[nn]; - cbn = v[N * j + nn]; + cbn = v[N * j + nn]; s_tmp1[t] += can1 * cbn; s_tmp2[t] += can2 * cbn; @@ -63,36 +67,43 @@ namespace ReSolve { __syncthreads(); - if(Tv5 >= 1024) { - if(t < 512) { + if (Tv5 >= 1024) + { + if (t < 512) + { s_tmp1[t] += s_tmp1[t + 512]; s_tmp2[t] += s_tmp2[t + 512]; } __syncthreads(); } - if(Tv5 >= 512) { - if(t < 256) { + if (Tv5 >= 512) + { + if (t < 256) + { s_tmp1[t] += s_tmp1[t + 256]; s_tmp2[t] += s_tmp2[t + 256]; } __syncthreads(); } { - if(t < 128) { + if (t < 128) + { s_tmp1[t] += s_tmp1[t + 128]; s_tmp2[t] += s_tmp2[t + 128]; } __syncthreads(); } { - if(t < 64) { + if (t < 64) + { s_tmp1[t] += s_tmp1[t + 64]; s_tmp2[t] += s_tmp2[t + 64]; } __syncthreads(); } - if(t < 32) { + if (t < 32) + { s_tmp1[t] += s_tmp1[t + 32]; s_tmp2[t] += s_tmp2[t + 32]; @@ -111,13 +122,13 @@ namespace ReSolve { s_tmp1[t] += s_tmp1[t + 1]; s_tmp2[t] += s_tmp2[t + 1]; } - if(t == 0) { - result[blockIdx.x] = s_tmp1[0]; + if (t == 0) + { + result[blockIdx.x] = s_tmp1[0]; result[blockIdx.x + k] = s_tmp2[0]; } } - /** * @brief AXPY y = y - x*alpha where alpha is [k x 1], and x is [N x k] needed in 1 and 2 synch GMRES * @@ -130,24 +141,27 @@ namespace ReSolve { * @param[in] alpha - doble array, size [k x 1] */ template - __global__ void massAxpy3(index_type N, - index_type k, + __global__ void massAxpy3(index_type N, + index_type k, const real_type* x_data, - real_type* y_data, + real_type* y_data, const real_type* alpha) { - index_type i = blockIdx.x * blockDim.x + threadIdx.x; - index_type t = threadIdx.x; + index_type i = blockIdx.x * blockDim.x + threadIdx.x; + index_type t = threadIdx.x; __shared__ real_type s_alpha[Tmaxk]; - if(t < k) { + if (t < k) + { s_alpha[t] = alpha[t]; } __syncthreads(); - if(i < N) { + if (i < N) + { real_type temp = 0.0; - for(index_type j = 0; j < k; ++j) { + for (index_type j = 0; j < k; ++j) + { temp += x_data[j * N + i] * s_alpha[j]; } y_data[i] -= temp; @@ -163,22 +177,25 @@ namespace ReSolve { * @param[in] a_val - values (CSR storage) * @param[out] result - array size [n x 1] containing sums of values in each row. */ - __global__ void matrixInfNormPart1(const index_type n, - const index_type nnz, + __global__ void matrixInfNormPart1(const index_type n, + const index_type nnz, const index_type* a_ia, - const real_type* a_val, - real_type* result) + const real_type* a_val, + real_type* result) { index_type idx = blockIdx.x * blockDim.x + threadIdx.x; - while (idx < n) { + while (idx < n) + { real_type sum = 0.0; - for (index_type i = a_ia[idx]; i < a_ia[idx + 1]; ++i) { + for (index_type i = a_ia[idx]; i < a_ia[idx + 1]; ++i) + { sum = sum + fabs(a_val[i]); } - result[idx] = sum; - idx += (blockDim.x * gridDim.x); + result[idx] = sum; + idx += (blockDim.x * gridDim.x); } } + /** * @brief Scales a csr matrix on the left by a diagonal matrix * @@ -189,25 +206,27 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void leftScale(index_type n, + __global__ void leftScale(index_type n, const index_type* a_row_ptr, - real_type* a_val, - const real_type* d_val) + real_type* a_val, + const real_type* d_val) { // Get row index from thread and block indices index_type row = blockIdx.x * blockDim.x + threadIdx.x; // Check if the thread's row is within matrix bounds - if (row < n) { + if (row < n) + { // Get the start and end positions for this row in the CSR format index_type row_start = a_row_ptr[row]; - index_type row_end = a_row_ptr[row + 1]; + index_type row_end = a_row_ptr[row + 1]; // Get the scaling factor for this row from the diagonal matrix real_type scale = d_val[row]; // Scale all non-zero elements in this row - for (index_type i = row_start; i < row_end; i++) { + for (index_type i = row_start; i < row_end; i++) + { a_val[i] *= scale; } } @@ -224,23 +243,25 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void rightScale(index_type n, + __global__ void rightScale(index_type n, const index_type* a_row_ptr, const index_type* a_col_ind, - real_type* a_val, - const real_type* d_val) + real_type* a_val, + const real_type* d_val) { // Get row index from thread and block indices index_type row = blockIdx.x * blockDim.x + threadIdx.x; // Check if the thread's row is within matrix bounds - if (row < n) { + if (row < n) + { // Get the start and end positions for this row in the CSR format index_type row_start = a_row_ptr[row]; - index_type row_end = a_row_ptr[row + 1]; + index_type row_end = a_row_ptr[row + 1]; // Scale all non-zero elements in this row - for (index_type i = row_start; i < row_end; i++) { + for (index_type i = row_start; i < row_end; i++) + { a_val[i] *= d_val[a_col_ind[i]]; } } @@ -266,12 +287,12 @@ namespace ReSolve { * value of Tv5? * @todo Should we use dynamic shared memory here instead? */ - void mass_inner_product_two_vectors(index_type n, - index_type i, + void mass_inner_product_two_vectors(index_type n, + index_type i, const real_type* vec1, const real_type* vec2, const real_type* mvec, - real_type* result) + real_type* result) { kernels::MassIPTwoVec<<>>(vec1, vec2, mvec, result, i, n); } @@ -300,15 +321,15 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - void leftScale(index_type n, - const index_type* a_row_ptr, - real_type* a_val, - const real_type* d_val) + void leftScale(index_type n, + const index_type* a_row_ptr, + real_type* a_val, + const real_type* d_val) { // Define block size and number of blocks const int block_size = 1; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::leftScale<<>>(n, a_row_ptr, a_val, d_val); } @@ -324,15 +345,15 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - void rightScale(index_type n, - const index_type* a_row_ptr, - const index_type* a_col_ind, - real_type* a_val, - const real_type* d_val) + void rightScale(index_type n, + const index_type* a_row_ptr, + const index_type* a_col_ind, + real_type* a_val, + const real_type* d_val) { // Define block size and number of blocks const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::rightScale<<>>(n, a_row_ptr, a_col_ind, a_val, d_val); } @@ -348,11 +369,11 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - void matrix_row_sums(index_type n, - index_type nnz, - const index_type* a_ia, - const real_type* a_val, - real_type* result) + void matrix_row_sums(index_type n, + index_type nnz, + const index_type* a_ia, + const real_type* a_val, + real_type* result) { kernels::matrixInfNormPart1<<<1000, 1024>>>(n, nnz, a_ia, a_val, result); } diff --git a/resolve/cuda/cudaKernels.h b/resolve/cuda/cudaKernels.h index 8781d2203..037a3ae8e 100644 --- a/resolve/cuda/cudaKernels.h +++ b/resolve/cuda/cudaKernels.h @@ -11,34 +11,36 @@ #include -namespace ReSolve { - namespace cuda { - void mass_inner_product_two_vectors(index_type n, - index_type i, +namespace ReSolve +{ + namespace cuda + { + void mass_inner_product_two_vectors(index_type n, + index_type i, const real_type* vec1, const real_type* vec2, const real_type* mvec, - real_type* result); + real_type* result); void mass_axpy(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha); - void leftScale(index_type n, + void leftScale(index_type n, const index_type* a_row_ptr, - real_type* a_val, - const real_type* diag); + real_type* a_val, + const real_type* diag); - void rightScale(index_type n, + void rightScale(index_type n, const index_type* a_row_ptr, const index_type* a_col_idx, - real_type* a_val, - const real_type* diag); + real_type* a_val, + const real_type* diag); - //needed for matrix inf nrm - void matrix_row_sums(index_type n, - index_type nnz, + // needed for matrix inf nrm + void matrix_row_sums(index_type n, + index_type nnz, const index_type* a_ia, - const real_type* a_val, - real_type* result); + const real_type* a_val, + real_type* result); } // namespace cuda } // namespace ReSolve diff --git a/resolve/cuda/cudaSketchingKernels.cu b/resolve/cuda/cudaSketchingKernels.cu index 1bb48ce0d..1808da3a6 100644 --- a/resolve/cuda/cudaSketchingKernels.cu +++ b/resolve/cuda/cudaSketchingKernels.cu @@ -3,13 +3,13 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @brief CUDA implementation of random sketching kernels. * @date 2023-12-08 - * - * + * + * */ #include -#include "cudaSketchingKernels.h" +#include "cudaSketchingKernels.h" namespace ReSolve { @@ -20,27 +20,29 @@ namespace ReSolve /** * @brief For count sketch in sketching random method - * + * * @param[in] n - number of entries in input vector * @param[in] k - number of entries in output vector (k < n) * @param[in] labels - array size [n x 1] containing integers from 0 to k-1, assigned randomly. * @param[in] flip - array size [n x 1] containing values `1` and `-1` - * @param[in] input - input vector, size [n x 1] + * @param[in] input - input vector, size [n x 1] * @param[out] output - output vector, size [k x 1] - * + * * @pre _output_ vector must be allocated and initialized with 0s prior to calling this kernel. */ - __global__ void count_sketch(const index_type n, - const index_type k, + __global__ void count_sketch(const index_type n, + const index_type k, const index_type* labels, const index_type* flip, - const real_type* input, - real_type* output) + const real_type* input, + real_type* output) { - index_type idx = blockIdx.x * blockDim.x + threadIdx.x; - while (idx < n) { + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + while (idx < n) + { real_type val = input[idx]; - if (flip[idx] != 1) { + if (flip[idx] != 1) + { val *= -1.0; } atomicAdd(&output[labels[idx]], val); @@ -50,44 +52,49 @@ namespace ReSolve /** * @brief Walsh-Hadamard transform (select) - * + * * @param[in] k - * @param[in] perm - * @param[in] input - - * @param[out] output - + * @param[out] output - */ - __global__ void select(const index_type k, + __global__ void select(const index_type k, const index_type* perm, - const real_type* input, - real_type* output) + const real_type* input, + real_type* output) { - index_type idx = blockIdx.x * blockDim.x + threadIdx.x; - while (idx < k) { - output[idx] = input[perm[idx]]; - idx += blockDim.x * gridDim.x; + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + while (idx < k) + { + output[idx] = input[perm[idx]]; + idx += blockDim.x * gridDim.x; } } /** * @brief Walsh-Hadamard transform (scale) - * + * * @param[in] n - * @param[in] D - * @param[in] x - * @param[out] y - */ - __global__ void scaleByD(const index_type n, - const index_type* D, - const real_type* x, - real_type* y) + __global__ void scaleByD(const index_type n, + const index_type* D, + const real_type* x, + real_type* y) { - index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; - while (idx < n) { + while (idx < n) + { - if (D[idx] == 1) { + if (D[idx] == 1) + { y[idx] = x[idx]; - } else { + } + else + { y[idx] = (-1.0) * x[idx]; } @@ -98,15 +105,15 @@ namespace ReSolve /** * @brief Single in-global memory radix-4 Fast Walsh Transform pass * (for strides exceeding elementary vector size). - * - * @param d_Output - + * + * @param d_Output - * @param d_Input - * @param stride - */ - __global__ void fwtBatch2Kernel(real_type* d_Output, real_type* d_Input, index_type stride) + __global__ void fwtBatch2Kernel(real_type* d_Output, real_type* d_Input, index_type stride) { const index_type pos = blockIdx.x * blockDim.x + threadIdx.x; - const index_type N = blockDim.x * gridDim.x * 4; + const index_type N = blockDim.x * gridDim.x * 4; real_type* d_Src = d_Input + blockIdx.y * N; real_type* d_Dst = d_Output + blockIdx.y * N; @@ -123,52 +130,53 @@ namespace ReSolve real_type D3 = d_Src[i3]; real_type T; - T = D0; - D0 = D0 + D2; - D2 = T - D2; - T = D1; - D1 = D1 + D3; - D3 = T - D3; - T = D0; + T = D0; + D0 = D0 + D2; + D2 = T - D2; + T = D1; + D1 = D1 + D3; + D3 = T - D3; + T = D0; d_Dst[i0] = D0 + D1; d_Dst[i1] = T - D1; - T = D2; + T = D2; d_Dst[i2] = D2 + D3; d_Dst[i3] = T - D3; } - /** - * @brief - * + * @brief + * * @param d_Output - * @param d_Input - * @param log2N - - * + * * @todo `d_Input` should be `const` parameter. - * + * */ - __global__ void fwtBatch1Kernel(real_type* d_Output, real_type* d_Input, index_type log2N) + __global__ void fwtBatch1Kernel(real_type* d_Output, real_type* d_Input, index_type log2N) { // Handle to thread block group - cooperative_groups::thread_block cta = cooperative_groups::this_thread_block(); - const index_type N = 1 << log2N; - const index_type base = blockIdx.x << log2N; + cooperative_groups::thread_block cta = cooperative_groups::this_thread_block(); + const index_type N = 1 << log2N; + const index_type base = blockIdx.x << log2N; //(2 ** 11) * 4 bytes == 8KB -- maximum s_data[] size for G80 extern __shared__ real_type s_data[]; - real_type* d_Src = d_Input + base; - real_type* d_Dst = d_Output + base; + real_type* d_Src = d_Input + base; + real_type* d_Dst = d_Output + base; - for (index_type pos = threadIdx.x; pos < N; pos += blockDim.x) { + for (index_type pos = threadIdx.x; pos < N; pos += blockDim.x) + { s_data[pos] = d_Src[pos]; } // Main radix-4 stages const index_type pos = threadIdx.x; - for (index_type stride = N >> 2; stride > 0; stride >>= 2) { + for (index_type stride = N >> 2; stride > 0; stride >>= 2) + { index_type lo = pos & (stride - 1); index_type i0 = ((pos - lo) << 2) + lo; index_type i1 = i0 + stride; @@ -182,125 +190,130 @@ namespace ReSolve real_type D3 = s_data[i3]; real_type T; - T = D0; + T = D0; D0 = D0 + D2; D2 = T - D2; - T = D1; + T = D1; D1 = D1 + D3; D3 = T - D3; - T = D0; + + T = D0; s_data[i0] = D0 + D1; s_data[i1] = T - D1; - T = D2; + + T = D2; s_data[i2] = D2 + D3; s_data[i3] = T - D3; } // Do single radix-2 stage for odd power of two - if (log2N & 1) { + if (log2N & 1) + { cooperative_groups::sync(cta); - for (index_type pos = threadIdx.x; pos < N / 2; pos += blockDim.x) { + for (index_type pos = threadIdx.x; pos < N / 2; pos += blockDim.x) + { index_type i0 = pos << 1; index_type i1 = i0 + 1; real_type D0 = s_data[i0]; real_type D1 = s_data[i1]; - s_data[i0] = D0 + D1; - s_data[i1] = D0 - D1; + s_data[i0] = D0 + D1; + s_data[i1] = D0 - D1; } } cooperative_groups::sync(cta); - for (index_type pos = threadIdx.x; pos < N; pos += blockDim.x) { + for (index_type pos = threadIdx.x; pos < N; pos += blockDim.x) + { d_Dst[pos] = s_data[pos]; } } } // namespace kernels - // // Kernel wrappers // /** - * @brief Kernel wrapper for - * + * @brief Kernel wrapper for + * * @param[in] n - (unsketched ) vector length * @param[in] k - sketched vector length (_n_ >> _k_) - * @param[in] labels - array of length _n_, containing integers with values between 0 and k-1 - * @param[in] flip - array with values 1 and -1 + * @param[in] labels - array of length _n_, containing integers with values between 0 and k-1 + * @param[in] flip - array with values 1 and -1 * @param[in] input - input vector, length _n_ - * @param[out] output - output vector, length _k_ -1 - * + * @param[out] output - output vector, length _k_ -1 + * * @todo Decide how to allow user to configure grid and block sizes. */ - void count_sketch_theta(index_type n, - index_type k, + void count_sketch_theta(index_type n, + index_type k, const index_type* labels, const index_type* flip, - const real_type* input, - real_type* output) + const real_type* input, + real_type* output) { kernels::count_sketch<<<10000, 1024>>>(n, k, labels, flip, input, output); } /** * @brief Wrapper for `select` kernel, part of Walsh-Hadamard transform - * + * * @param[in] k - - * @param[in] perm - + * @param[in] perm - * @param[in] input - - * @param[out] output - - * + * @param[out] output - + * * @todo Decide how to allow user to configure grid and block sizes. */ - void FWHT_select(index_type k, + void FWHT_select(index_type k, const index_type* perm, - const real_type* input, - real_type* output) + const real_type* input, + real_type* output) { kernels::select<<<1000, 1024>>>(k, perm, input, output); } /** * @brief Wrapper for `scale` kernel, part of Walsh-Hadamard transform - * + * * @param[in] n - * @param[in] D - * @param[in] x - * @param[out] y - - * + * * @todo Decide how to allow user to configure grid and block sizes. */ - void FWHT_scaleByD(index_type n, - const index_type* D, - const real_type* x, - real_type* y) + void FWHT_scaleByD(index_type n, + const index_type* D, + const real_type* x, + real_type* y) { kernels::scaleByD<<<1000, 1024>>>(n, D, x, y); } /** - * @brief - * + * @brief + * * @param[in] M - - * @param[in] log2N - - * @param[out] d_Data - - * + * @param[in] log2N - + * @param[out] d_Data - + * * @todo Decide if and how user should configure log2size, thread_n, etc. */ void FWHT(index_type M, index_type log2N, real_type* d_Data) { const index_type ELEMENTARY_LOG2SIZE = 11; - const index_type THREAD_N = 1024; - index_type N = 1 << log2N; - dim3 grid((1 << log2N) / (4 * THREAD_N), M, 1); + const index_type THREAD_N = 1024; + index_type N = 1 << log2N; + dim3 grid((1 << log2N) / (4 * THREAD_N), M, 1); - for (; log2N > ELEMENTARY_LOG2SIZE; log2N -= 2, N >>= 2, M <<= 2) { + for (; log2N > ELEMENTARY_LOG2SIZE; log2N -= 2, N >>= 2, M <<= 2) + { kernels::fwtBatch2Kernel<<>>(d_Data, d_Data, N / 4); } diff --git a/resolve/cuda/cudaSketchingKernels.h b/resolve/cuda/cudaSketchingKernels.h index bd1b8d85f..3a99a1e85 100644 --- a/resolve/cuda/cudaSketchingKernels.h +++ b/resolve/cuda/cudaSketchingKernels.h @@ -1,11 +1,11 @@ /** * @file cudaSketchingKernels.h * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) - * + * * @brief Contains prototypes of CUDA random sketching kernels. - * + * * @note These kernels will be used in CUDA specific code, only. - * + * */ #pragma once @@ -16,23 +16,23 @@ namespace ReSolve namespace cuda { // needed for rand solver - void count_sketch_theta(index_type n, - index_type k, + void count_sketch_theta(index_type n, + index_type k, const index_type* labels, const index_type* flip, - const real_type* input, - real_type* output); + const real_type* input, + real_type* output); - void FWHT_select(index_type k, - const index_type* perm, - const real_type* input, - real_type* output); + void FWHT_select(index_type k, + const index_type* perm, + const real_type* input, + real_type* output); - void FWHT_scaleByD(index_type n, - const index_type* D, - const real_type* x, - real_type* y); + void FWHT_scaleByD(index_type n, + const index_type* D, + const real_type* x, + real_type* y); - void FWHT(index_type M, index_type log2N, real_type* d_Data); - } + void FWHT(index_type M, index_type log2N, real_type* d_Data); + } // namespace cuda } // namespace ReSolve diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index 85e0f898c..0a879c61d 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -9,12 +9,13 @@ */ #include -#include #include +#include namespace ReSolve { - namespace cuda { + namespace cuda + { namespace kernels { @@ -31,7 +32,7 @@ namespace ReSolve __global__ void set_const(index_type n, real_type val, real_type* arr) { index_type i = blockIdx.x * blockDim.x + threadIdx.x; - if(i < n) + if (i < n) { arr[i] = val; } @@ -50,7 +51,7 @@ namespace ReSolve __global__ void addConst(index_type n, real_type val, real_type* arr) { index_type i = blockIdx.x * blockDim.x + threadIdx.x; - if(i < n) + if (i < n) { arr[i] += val; } @@ -65,15 +66,16 @@ namespace ReSolve * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void scale(index_type n, - const real_type* d_val, - real_type* vec) + __global__ void scale(index_type n, + const real_type* d_val, + real_type* vec) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; // Check if the index is within bounds - if (idx < n) { + if (idx < n) + { // Scale the vector element by the corresponding diagonal value vec[idx] *= d_val[idx]; } @@ -85,7 +87,7 @@ namespace ReSolve { index_type num_blocks; index_type block_size = 512; - num_blocks = (n + block_size - 1) / block_size; + num_blocks = (n + block_size - 1) / block_size; kernels::set_const<<>>(n, val, arr); } @@ -93,7 +95,7 @@ namespace ReSolve { index_type num_blocks; index_type block_size = 512; - num_blocks = (n + block_size - 1) / block_size; + num_blocks = (n + block_size - 1) / block_size; kernels::addConst<<>>(n, val, arr); } @@ -106,13 +108,13 @@ namespace ReSolve * * @todo Decide how to allow user to configure grid and block sizes. */ - void scale(index_type n, - const real_type* diag, - real_type* vec) + void scale(index_type n, + const real_type* diag, + real_type* vec) { // Define block size and number of blocks const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::scale<<>>(n, diag, vec); } diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index ff6b43c1e..f8bef0d5b 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -13,9 +13,10 @@ namespace ReSolve { - namespace cuda { + namespace cuda + { void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); - } -} + } // namespace cuda +} // namespace ReSolve diff --git a/resolve/cuda/cuda_check_errors.hpp b/resolve/cuda/cuda_check_errors.hpp index 00a2029e1..bbebed6b4 100644 --- a/resolve/cuda/cuda_check_errors.hpp +++ b/resolve/cuda/cuda_check_errors.hpp @@ -1,9 +1,9 @@ /** * @file cuda_check_errors.hpp - * + * * Contains macro to get error code from CUDA functions and to stream * appropriate error output to Re::Solve's logger. - * + * * @author Kasia Swirydowicz * @author Slaven Peles */ @@ -12,17 +12,19 @@ #include template -int check(T result, - char const *const func, - const char *const file, - int const line) +int check(T result, + char const* const func, + const char* const file, + int const line) { - if (result) { + if (result) + { ReSolve::io::Logger::error() << "CUDA error in function " - << func << " at " << file << ":" << line + << func << " at " << file << ":" << line << ", error# " << result << "\n"; return -1; } return 0; } -#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) \ No newline at end of file + +#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) diff --git a/resolve/cusolver_defs.hpp b/resolve/cusolver_defs.hpp index c9db034cc..fbadfa44b 100644 --- a/resolve/cusolver_defs.hpp +++ b/resolve/cusolver_defs.hpp @@ -3,7 +3,7 @@ * @file cusolver_defs.hpp * * @author Kasia Swirydowicz , PNNL - * + * * Contains prototypes of cuSOLVER functions not in public API. * */ @@ -11,95 +11,96 @@ #ifndef CUSOLVERDEFS_H #define CUSOLVERDEFS_H -#include "cusparse.h" -#include "cusolverSp.h" #include -#include #include -#include "cusolverSp_LOWLEVEL_PREVIEW.h" + +#include #include "cusolverRf.h" +#include "cusolverSp.h" +#include "cusolverSp_LOWLEVEL_PREVIEW.h" +#include "cusparse.h" -extern "C" { +extern "C" +{ /* - * prototype not in public header file + * prototype not in public header file */ struct csrgluInfo; - typedef struct csrgluInfo *csrgluInfo_t; + typedef struct csrgluInfo* csrgluInfo_t; cusolverStatus_t CUSOLVERAPI - cusolverSpCreateGluInfo(csrgluInfo_t *info); + cusolverSpCreateGluInfo(csrgluInfo_t* info); cusolverStatus_t CUSOLVERAPI cusolverSpDestroyGluInfo(csrgluInfo_t info); cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSetup(cusolverSpHandle_t handle, - int m, + cusolverSpDgluSetup(cusolverSpHandle_t handle, + int m, /* A can be base-0 or base-1 */ - int nnzA, + int nnzA, const cusparseMatDescr_t descrA, - const int* h_csrRowPtrA, - const int* h_csrColIndA, - const int* h_P, /* base-0 */ - const int* h_Q, /* base-0 */ + const int* h_csrRowPtrA, + const int* h_csrColIndA, + const int* h_P, /* base-0 */ + const int* h_Q, /* base-0 */ /* M can be base-0 or base-1 */ - int nnzM, + int nnzM, const cusparseMatDescr_t descrM, - const int* h_csrRowPtrM, - const int* h_csrColIndM, - csrgluInfo_t info); + const int* h_csrRowPtrM, + const int* h_csrColIndM, + csrgluInfo_t info); cusolverStatus_t CUSOLVERAPI cusolverSpDgluBufferSize(cusolverSpHandle_t handle, - csrgluInfo_t info, - size_t* pBufferSize); + csrgluInfo_t info, + size_t* pBufferSize); cusolverStatus_t CUSOLVERAPI cusolverSpDgluAnalysis(cusolverSpHandle_t handle, - csrgluInfo_t info, - void* workspace); + csrgluInfo_t info, + void* workspace); cusolverStatus_t CUSOLVERAPI - cusolverSpDgluReset(cusolverSpHandle_t handle, - int m, + cusolverSpDgluReset(cusolverSpHandle_t handle, + int m, /* A is original matrix */ - int nnzA, + int nnzA, const cusparseMatDescr_t descr_A, - const double* d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - csrgluInfo_t info); + const double* d_csrValA, + const int* d_csrRowPtrA, + const int* d_csrColIndA, + csrgluInfo_t info); cusolverStatus_t CUSOLVERAPI cusolverSpDgluFactor(cusolverSpHandle_t handle, - csrgluInfo_t info, - void *workspace); + csrgluInfo_t info, + void* workspace); cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSolve(cusolverSpHandle_t handle, - int m, + cusolverSpDgluSolve(cusolverSpHandle_t handle, + int m, /* A is original matrix */ - int nnzA, + int nnzA, const cusparseMatDescr_t descr_A, - const double *d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - const double* d_b0, /* right hand side */ - double* d_x, /* left hand side */ - int* ite_refine_succ, - double* r_nrm_inf_ptr, - csrgluInfo_t info, - void* workspace); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDnrminf(cusolverSpHandle_t handle, - int n, - const double *x, - double* result, /* |x|_inf, host */ - void* d_work /* at least 8192 bytes */ - ); + const double* d_csrValA, + const int* d_csrRowPtrA, + const int* d_csrColIndA, + const double* d_b0, /* right hand side */ + double* d_x, /* left hand side */ + int* ite_refine_succ, + double* r_nrm_inf_ptr, + csrgluInfo_t info, + void* workspace); + cusolverStatus_t CUSOLVERAPI + cusolverSpDnrminf(cusolverSpHandle_t handle, + int n, + const double* x, + double* result, /* |x|_inf, host */ + void* d_work /* at least 8192 bytes */ + ); } // extern "C" #endif // CUSOLVERDEFS_H diff --git a/resolve/hip/CMakeLists.txt b/resolve/hip/CMakeLists.txt index ca308c4b7..06f5cde8b 100644 --- a/resolve/hip/CMakeLists.txt +++ b/resolve/hip/CMakeLists.txt @@ -32,4 +32,3 @@ target_link_libraries(resolve_backend_hip PUBLIC resolve_hip) # install include headers install(FILES ${ReSolve_HIP_HEADER_INSTALL} DESTINATION include/resolve/hip) - diff --git a/resolve/hip/HipMemory.hpp b/resolve/hip/HipMemory.hpp index 1ac344d4d..372da4f25 100644 --- a/resolve/hip/HipMemory.hpp +++ b/resolve/hip/HipMemory.hpp @@ -1,9 +1,10 @@ #pragma once #include -#include +#include #include + #include "hip_check_errors.hpp" namespace ReSolve @@ -12,10 +13,10 @@ namespace ReSolve { /** * @brief Class containing wrappers for CUDA API functions. - * + * * All wrappers are implemented as static functions returning integer * error code from CUDA API functions. - * + * * @author Slaven Peles */ struct Hip @@ -24,13 +25,13 @@ namespace ReSolve { hipDeviceSynchronize(); } - + static int getLastDeviceError() { return static_cast(hipGetLastError()); } - - /** + + /** * @brief deletes variable from device * * @param v - a variable on the device @@ -47,7 +48,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of array elements (int, size_t) - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -61,13 +62,13 @@ namespace ReSolve /** * @brief allocates buffer v onto device. - * + * * The difference from the array is that buffer size is required in bytes, * not number of elements. * * @param v - pointer to the buffer to be allocated on the device * @param n - size of the buffer in bytes - * + * * @tparam T - Buffer element data type type (typically void) * @tparam I - Buffer size type (typically size_t) * @@ -84,7 +85,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -102,7 +103,7 @@ namespace ReSolve * @param v - pointer to the array to be allocated on the device * @param c - value to set all array elements * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -115,7 +116,7 @@ namespace ReSolve return checkHipErrors(0); } - /** + /** * @brief Copies array `src` from device to the array `dst` on the host. * * @param[in] n - size of src array @@ -165,8 +166,7 @@ namespace ReSolve { return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyHostToDevice)); } - }; - } + } // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/hip/hipKernels.h b/resolve/hip/hipKernels.h index 06b211627..4308ac589 100644 --- a/resolve/hip/hipKernels.h +++ b/resolve/hip/hipKernels.h @@ -12,8 +12,10 @@ #include -namespace ReSolve { - namespace hip { +namespace ReSolve +{ + namespace hip + { void mass_inner_product_two_vectors(index_type n, index_type i, real_type* vec1, @@ -22,40 +24,39 @@ namespace ReSolve { real_type* result); void mass_axpy(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha); - void leftScale(index_type n, + void leftScale(index_type n, const index_type* a_row_ptr, - real_type* a_val, - const real_type* diag); + real_type* a_val, + const real_type* diag); - void rightScale(index_type n, + void rightScale(index_type n, const index_type* a_row_ptr, const index_type* a_col_idx, - real_type* a_val, - const real_type* diag); + real_type* a_val, + const real_type* diag); - //needed for matrix inf nrm - void matrix_row_sums(index_type n, - index_type nnz, + // needed for matrix inf nrm + void matrix_row_sums(index_type n, + index_type nnz, index_type* a_ia, - real_type* a_val, - real_type* result); + real_type* a_val, + real_type* result); // needed for triangular solve - void permuteVectorP(index_type n, + void permuteVectorP(index_type n, index_type* perm_vector, - real_type* vec_in, - real_type* vec_out); + real_type* vec_in, + real_type* vec_out); - void permuteVectorQ(index_type n, + void permuteVectorQ(index_type n, index_type* perm_vector, - real_type* vec_in, - real_type* vec_out); - + real_type* vec_in, + real_type* vec_out); void vector_inf_norm(index_type n, real_type* input, - real_type * buffer, + real_type* buffer, real_type* result); } // namespace hip } // namespace ReSolve diff --git a/resolve/hip/hipSketchingKernels.h b/resolve/hip/hipSketchingKernels.h index c1c11f56e..b4a56be95 100644 --- a/resolve/hip/hipSketchingKernels.h +++ b/resolve/hip/hipSketchingKernels.h @@ -3,9 +3,9 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @brief Contains prototypes of HIP random sketching kernels. * @date 2023-12-08 - * + * * @note These kernels will be used in HIP specific code, only. - * + * */ #pragma once @@ -17,23 +17,23 @@ namespace ReSolve namespace hip { // needed for rand solver - void count_sketch_theta(index_type n, - index_type k, + void count_sketch_theta(index_type n, + index_type k, index_type* labels, index_type* flip, - real_type* input, - real_type* output); + real_type* input, + real_type* output); - void FWHT_select(index_type k, - index_type* perm, - real_type* input, - real_type* output); + void FWHT_select(index_type k, + index_type* perm, + real_type* input, + real_type* output); - void FWHT_scaleByD(index_type n, - index_type* D, - real_type* x, - real_type* y); + void FWHT_scaleByD(index_type n, + index_type* D, + real_type* x, + real_type* y); - void FWHT(index_type M, index_type log2N, real_type* d_Data); + void FWHT(index_type M, index_type log2N, real_type* d_Data); } // namespace hip } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index 7a2fcfb91..f5790017d 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -18,5 +18,5 @@ namespace ReSolve void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); - } -} + } // namespace hip +} // namespace ReSolve diff --git a/resolve/hip/hip_check_errors.hpp b/resolve/hip/hip_check_errors.hpp index 1f483d35a..06b9e422c 100644 --- a/resolve/hip/hip_check_errors.hpp +++ b/resolve/hip/hip_check_errors.hpp @@ -1,9 +1,9 @@ /** * @file hip_check_errors.hpp - * + * * Contains macro to get error code from CUDA functions and to stream * appropriate error output to Re::Solve's logger. - * + * * @author Kasia Swirydowicz * @author Slaven Peles */ @@ -12,17 +12,19 @@ #include template -int check(T result, - char const *const func, - const char *const file, - int const line) +int check(T result, + char const* const func, + const char* const file, + int const line) { - if (result) { + if (result) + { ReSolve::io::Logger::error() << "HIP error in function " - << func << " at " << file << ":" << line + << func << " at " << file << ":" << line << ", error# " << result << "\n"; return -1; } return 0; } + #define checkHipErrors(val) check((val), #val, __FILE__, __LINE__) diff --git a/resolve/lusol/lusol.hpp b/resolve/lusol/lusol.hpp index 265d6c7a5..9bd83e26a 100644 --- a/resolve/lusol/lusol.hpp +++ b/resolve/lusol/lusol.hpp @@ -4,14 +4,15 @@ // TODO: should we attach documentation comments to these or is there no point? -extern "C" { +extern "C" +{ void lu1fac(ReSolve::index_type* m, ReSolve::index_type* n, ReSolve::index_type* nelem, ReSolve::index_type* lena, ReSolve::index_type* luparm, - ReSolve::real_type* parmlu, - ReSolve::real_type* a, + ReSolve::real_type* parmlu, + ReSolve::real_type* a, ReSolve::index_type* indc, ReSolve::index_type* indr, ReSolve::index_type* p, @@ -24,18 +25,18 @@ extern "C" { ReSolve::index_type* iqloc, ReSolve::index_type* ipinv, ReSolve::index_type* iqinv, - ReSolve::real_type* w, + ReSolve::real_type* w, ReSolve::index_type* inform); void lu6sol(ReSolve::index_type* mode, ReSolve::index_type* m, ReSolve::index_type* n, - ReSolve::real_type* v, - ReSolve::real_type* w, + ReSolve::real_type* v, + ReSolve::real_type* w, ReSolve::index_type* lena, ReSolve::index_type* luparm, - ReSolve::real_type* parmlu, - ReSolve::real_type* a, + ReSolve::real_type* parmlu, + ReSolve::real_type* a, ReSolve::index_type* indc, ReSolve::index_type* indr, ReSolve::index_type* p, diff --git a/resolve/matrix/Coo.cpp b/resolve/matrix/Coo.cpp index 23c3afac7..3e883a285 100644 --- a/resolve/matrix/Coo.cpp +++ b/resolve/matrix/Coo.cpp @@ -1,11 +1,11 @@ -#include // <-- includes memcpy -#include -#include +#include "Coo.hpp" + #include +#include // <-- includes memcpy +#include +#include #include -#include "Coo.hpp" - namespace ReSolve { @@ -16,7 +16,8 @@ namespace ReSolve sparse_format_ = TRIPLET; } - matrix::Coo::Coo(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) + matrix::Coo::Coo(index_type n, index_type m, index_type nnz) + : Sparse(n, m, nnz) { sparse_format_ = TRIPLET; } @@ -24,8 +25,9 @@ namespace ReSolve matrix::Coo::Coo(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) + bool symmetric, + bool expanded) + : Sparse(n, m, nnz, symmetric, expanded) { sparse_format_ = TRIPLET; } @@ -33,14 +35,14 @@ namespace ReSolve /** * @brief Hijacking constructor */ - matrix::Coo::Coo(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + matrix::Coo::Coo(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst) : Sparse(n, m, nnz, symmetric, expanded) @@ -48,81 +50,95 @@ namespace ReSolve sparse_format_ = TRIPLET; int control = -1; - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) { control = 0;} - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)){ control = 1;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) { control = 2;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)){ control = 3;} + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) + { + control = 0; + } + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)) + { + control = 1; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) + { + control = 2; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)) + { + control = 3; + } switch (control) { - case 0: // cpu->cpu - // Set host data - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_values_ = true; - owns_cpu_sparsity_pattern_ = true; - // Make sure there is no device data. - if (d_row_data_ || d_col_data_ || d_val_data_) { - out::error() << "Device data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 2: // gpu->cpu - // Set device data and copy it to host - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_values_ = true; - owns_gpu_sparsity_pattern_ = true; - syncData(memspaceDst); - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 1: // cpu->gpu - // Set host data and copy it to device - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_values_ = true; - owns_cpu_sparsity_pattern_ = true; - syncData(memspaceDst); - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 3: // gpu->gpu - // Set device data - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_values_ = true; - owns_gpu_sparsity_pattern_ = true; - // Make sure there is no device data. - if (h_row_data_ || h_col_data_ || h_val_data_) { - out::error() << "Host data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - default: - out::error() << "Coo constructor failed! " - << "Possible bug in memory spaces setting.\n"; - break; + case 0: // cpu->cpu + // Set host data + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_values_ = true; + owns_cpu_sparsity_pattern_ = true; + // Make sure there is no device data. + if (d_row_data_ || d_col_data_ || d_val_data_) + { + out::error() << "Device data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 2: // gpu->cpu + // Set device data and copy it to host + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_values_ = true; + owns_gpu_sparsity_pattern_ = true; + syncData(memspaceDst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 1: // cpu->gpu + // Set host data and copy it to device + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_values_ = true; + owns_cpu_sparsity_pattern_ = true; + syncData(memspaceDst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 3: // gpu->gpu + // Set device data + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_values_ = true; + owns_gpu_sparsity_pattern_ = true; + // Make sure there is no device data. + if (h_row_data_ || h_col_data_ || h_val_data_) + { + out::error() << "Host data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + default: + out::error() << "Coo constructor failed! " + << "Possible bug in memory spaces setting.\n"; + break; } } @@ -134,13 +150,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } } @@ -148,13 +165,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } } @@ -162,99 +180,117 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } } - int matrix::Coo::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + int matrix::Coo::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { - //four cases (for now) + // four cases (for now) index_type nnz_current = nnz_; setNotUpdated(); - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)){ control = 0;} - if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} - if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)){ control = 2;} - if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Coo::copyDataFrom one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - this->h_row_data_ = new index_type[nnz_current]; - this->h_col_data_ = new index_type[nnz_current]; + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))) + { + control = 1; + } + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))) + { + control = 3; + } + + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Coo::copyDataFrom one of host row or column data is null!\n"); + + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + this->h_row_data_ = new index_type[nnz_current]; + this->h_col_data_ = new index_type[nnz_current]; owns_cpu_sparsity_pattern_ = true; } - if (h_val_data_ == nullptr) { + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Coo::copyDataFrom one of device row or column data is null!\n"); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Coo::copyDataFrom one of device row or column data is null!\n"); - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; } - if (d_val_data_ == nullptr) { + if (d_val_data_ == nullptr) + { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_values_ = true; } } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; } - int matrix::Coo::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + int matrix::Coo::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { @@ -266,9 +302,10 @@ namespace ReSolve int matrix::Coo::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace);//just in case + destroyMatrixData(memspace); // just in case - if (memspace == memory::HOST) { + if (memspace == memory::HOST) + { this->h_row_data_ = new index_type[nnz_current]; std::fill(h_row_data_, h_row_data_ + nnz_current, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -276,16 +313,17 @@ namespace ReSolve this->h_val_data_ = new real_type[nnz_current]; std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; + owns_cpu_values_ = true; return 0; } - if (memspace == memory::DEVICE) { + if (memspace == memory::DEVICE) + { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; + owns_gpu_values_ = true; return 0; } return -1; @@ -306,65 +344,72 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Coo::syncData one of host row or column data is null!\n"); - - if (h_data_updated_) { - out::error() << "Coo::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) { - out::error() << "Coo::syncData is trying to sync host with device, but device is out of date!\n" - << "See Coo::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - h_row_data_ = new index_type[nnz_]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Coo::syncData one of device row or column data is null!\n"); - - if (d_data_updated_) { - out::error() << "Coo::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) { - out::error() << "Coo::syncData is trying to sync device with host, but host is out of date!\n" - << "See Coo::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: + switch (memspace) + { + case HOST: + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Coo::syncData one of host row or column data is null!\n"); + + if (h_data_updated_) + { + out::error() << "Coo::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); + return 1; + } + if (!d_data_updated_) + { + out::error() << "Coo::syncData is trying to sync host with device, but device is out of date!\n" + << "See Coo::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + h_row_data_ = new index_type[nnz_]; + h_col_data_ = new index_type[nnz_]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new real_type[nnz_]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Coo::syncData one of device row or column data is null!\n"); + + if (d_data_updated_) + { + out::error() << "Coo::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); return 1; + } + if (!h_data_updated_) + { + out::error() << "Coo::syncData is trying to sync device with host, but host is out of date!\n" + << "See Coo::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; } // switch } @@ -376,7 +421,8 @@ namespace ReSolve void matrix::Coo::print(std::ostream& out, index_type indexing_base) { out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for(int i = 0; i < nnz_; ++i) { + for (int i = 0; i < nnz_; ++i) + { out << h_row_data_[i] + indexing_base << " " << h_col_data_[i] + indexing_base << " " << h_val_data_[i] << "\n"; diff --git a/resolve/matrix/Coo.hpp b/resolve/matrix/Coo.hpp index aca181e56..7f37cbd2e 100644 --- a/resolve/matrix/Coo.hpp +++ b/resolve/matrix/Coo.hpp @@ -1,43 +1,46 @@ #pragma once #include "Sparse.hpp" -namespace ReSolve { namespace matrix { - - class Coo : public Sparse +namespace ReSolve +{ + namespace matrix { + + class Coo : public Sparse + { public: Coo(); Coo(index_type n, index_type m, index_type nnz); - Coo(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); Coo(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + bool symmetric, + bool expanded); + Coo(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst); ~Coo(); virtual index_type* getRowData(memory::MemorySpace memspace); virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual real_type* getValues(memory::MemorySpace memspace); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); @@ -46,6 +49,7 @@ namespace ReSolve { namespace matrix { virtual void print(std::ostream& file_out = std::cout, index_type indexing_base = 0); virtual int syncData(memory::MemorySpace memspaceOut); - }; + }; -}} // namespace ReSolve::matrix + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Csc.cpp b/resolve/matrix/Csc.cpp index 843d4326d..f5d90d611 100644 --- a/resolve/matrix/Csc.cpp +++ b/resolve/matrix/Csc.cpp @@ -1,9 +1,10 @@ -#include // <-- includes memcpy -#include +#include "Csc.hpp" + #include +#include // <-- includes memcpy +#include #include -#include "Csc.hpp" namespace ReSolve { @@ -14,7 +15,8 @@ namespace ReSolve sparse_format_ = COMPRESSED_SPARSE_COLUMN; } - matrix::Csc::Csc(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) + matrix::Csc::Csc(index_type n, index_type m, index_type nnz) + : Sparse(n, m, nnz) { sparse_format_ = COMPRESSED_SPARSE_COLUMN; } @@ -22,8 +24,9 @@ namespace ReSolve matrix::Csc::Csc(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) + bool symmetric, + bool expanded) + : Sparse(n, m, nnz, symmetric, expanded) { sparse_format_ = COMPRESSED_SPARSE_COLUMN; } @@ -36,13 +39,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } } @@ -50,13 +54,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } } @@ -64,100 +69,117 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } } - int matrix::Csc::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + int matrix::Csc::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { index_type nnz_current = nnz_; - //four cases (for now) + // four cases (for now) int control = -1; setNotUpdated(); - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST) ) { control = 0;} - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) { control = 1;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST) ) { control = 2;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) { control = 3;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) + { + control = 1; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) + { + control = 3; + } - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csc::copyDataFrom one of host row or column data is null!\n"); + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csc::copyDataFrom one of host row or column data is null!\n"); - if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) { - this->h_col_data_ = new index_type[m_ + 1]; - this->h_row_data_ = new index_type[nnz_current]; + if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) + { + this->h_col_data_ = new index_type[m_ + 1]; + this->h_row_data_ = new index_type[nnz_current]; owns_cpu_sparsity_pattern_ = true; } - if (h_val_data_ == nullptr) { + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csc::copyDataFrom one of device row or column data is null!\n"); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csc::copyDataFrom one of device row or column data is null!\n"); - if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) { + if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) + { mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; } - if (d_val_data_ == nullptr) { + if (d_val_data_ == nullptr) + { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_values_ = true; } } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_col_data_, col_data, m_ + 1); - mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_col_data_, col_data, m_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_col_data_, col_data, m_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, m_ + 1); - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_col_data_, col_data, m_ + 1); + mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_col_data_, col_data, m_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_col_data_, col_data, m_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, m_ + 1); + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; - } - int matrix::Csc::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + int matrix::Csc::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { @@ -169,9 +191,10 @@ namespace ReSolve int matrix::Csc::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace);//just in case + destroyMatrixData(memspace); // just in case - if (memspace == memory::HOST) { + if (memspace == memory::HOST) + { this->h_col_data_ = new index_type[m_ + 1]; std::fill(h_col_data_, h_col_data_ + m_ + 1, 0); this->h_row_data_ = new index_type[nnz_current]; @@ -179,16 +202,17 @@ namespace ReSolve this->h_val_data_ = new real_type[nnz_current]; std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; + owns_cpu_values_ = true; return 0; } - if (memspace == memory::DEVICE) { - mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); + if (memspace == memory::DEVICE) + { + mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; + owns_gpu_values_ = true; return 0; } return -1; @@ -209,65 +233,72 @@ namespace ReSolve { using namespace ReSolve::memory; - switch(memspace) { - case HOST: - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csc::syncData one of host row or column data is null!\n"); + switch (memspace) + { + case HOST: + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csc::syncData one of host row or column data is null!\n"); - if (h_data_updated_) { - out::error() << "Csc::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) { - out::error() << "Csc::syncData is trying to sync host with device, but device is out of date!\n" - << "See Csc::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) { - h_col_data_ = new index_type[m_ + 1]; - h_row_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, m_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csc::syncData one of device row or column data is null!\n"); + if (h_data_updated_) + { + out::error() << "Csc::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); + return 1; + } + if (!d_data_updated_) + { + out::error() << "Csc::syncData is trying to sync host with device, but device is out of date!\n" + << "See Csc::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) + { + h_col_data_ = new index_type[m_ + 1]; + h_row_data_ = new index_type[nnz_]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new real_type[nnz_]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, m_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csc::syncData one of device row or column data is null!\n"); - if (d_data_updated_) { - out::error() << "Csc::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) { - out::error() << "Csc::syncData is trying to sync device with host, but host is out of date!\n" - << "See Csc::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); - mem_.allocateArrayOnDevice(&d_row_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, m_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: + if (d_data_updated_) + { + out::error() << "Csc::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); return 1; + } + if (!h_data_updated_) + { + out::error() << "Csc::syncData is trying to sync device with host, but host is out of date!\n" + << "See Csc::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); + mem_.allocateArrayOnDevice(&d_row_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, m_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; } // switch } @@ -279,10 +310,12 @@ namespace ReSolve void matrix::Csc::print(std::ostream& out, index_type indexing_base) { out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for(index_type i = 0; i < m_; ++i) { - for (index_type j = h_col_data_[i]; j < h_col_data_[i+1]; ++j) { + for (index_type i = 0; i < m_; ++i) + { + for (index_type j = h_col_data_[i]; j < h_col_data_[i + 1]; ++j) + { out << h_row_data_[j] + indexing_base << " " - << i + indexing_base << " " + << i + indexing_base << " " << h_val_data_[j] << "\n"; } } diff --git a/resolve/matrix/Csc.hpp b/resolve/matrix/Csc.hpp index 9cf8f02fb..91708dba7 100644 --- a/resolve/matrix/Csc.hpp +++ b/resolve/matrix/Csc.hpp @@ -1,41 +1,45 @@ #pragma once #include "Sparse.hpp" -namespace ReSolve { namespace matrix { - - class Csc : public Sparse +namespace ReSolve +{ + namespace matrix { + + class Csc : public Sparse + { public: Csc(); Csc(index_type n, index_type m, index_type nnz); - Csc(index_type n, - index_type m, + Csc(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded); + bool symmetric, + bool expanded); ~Csc(); virtual index_type* getRowData(memory::MemorySpace memspace); virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual real_type* getValues(memory::MemorySpace memspace); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + memory::MemorySpace memspaceOut); + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut); + memory::MemorySpace memspaceOut); virtual int allocateMatrixData(memory::MemorySpace memspace); virtual void print(std::ostream& file_out = std::cout, index_type indexing_base = 0); virtual int syncData(memory::MemorySpace memspaceOut); - }; + }; -}} // namespace ReSolve::matrix + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 08aa6d49f..122bec6fb 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -1,9 +1,10 @@ -#include // <-- includes memcpy +#include "Csr.hpp" + #include #include +#include // <-- includes memcpy #include -#include "Csr.hpp" #include "Coo.hpp" #include @@ -16,7 +17,8 @@ namespace ReSolve sparse_format_ = COMPRESSED_SPARSE_ROW; } - matrix::Csr::Csr(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) + matrix::Csr::Csr(index_type n, index_type m, index_type nnz) + : Sparse(n, m, nnz) { sparse_format_ = COMPRESSED_SPARSE_ROW; } @@ -24,8 +26,9 @@ namespace ReSolve matrix::Csr::Csr(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) + bool symmetric, + bool expanded) + : Sparse(n, m, nnz, symmetric, expanded) { sparse_format_ = COMPRESSED_SPARSE_ROW; } @@ -44,14 +47,14 @@ namespace ReSolve * @param[in] memspaceSrc * @param[in] memspaceDst */ - matrix::Csr::Csr(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + matrix::Csr::Csr(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst) : Sparse(n, m, nnz, symmetric, expanded) @@ -59,94 +62,108 @@ namespace ReSolve sparse_format_ = COMPRESSED_SPARSE_ROW; int control = -1; - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) { control = 0;} - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)){ control = 1;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) { control = 2;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)){ control = 3;} + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) + { + control = 0; + } + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)) + { + control = 1; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) + { + control = 2; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)) + { + control = 3; + } switch (control) { - case 0: // cpu->cpu - // Set host data - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - // Set device data to null - if (d_row_data_ || d_col_data_ || d_val_data_) { - out::error() << "Device data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - d_data_updated_ = false; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 2: // gpu->cpu - // Set device data and copy it to host - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - syncData(memspaceDst); - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 1: // cpu->gpu - // Set host data and copy it to device - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - syncData(memspaceDst); - - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 3: // gpu->gpu - // Set device data - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - // Set host data to null - if (h_row_data_ || h_col_data_ || h_val_data_) { - out::error() << "Host data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - h_data_updated_ = false; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - default: - out::error() << "Csr constructor failed! " - << "Possible bug in memory spaces setting.\n"; - break; + case 0: // cpu->cpu + // Set host data + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + // Set device data to null + if (d_row_data_ || d_col_data_ || d_val_data_) + { + out::error() << "Device data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + d_data_updated_ = false; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 2: // gpu->cpu + // Set device data and copy it to host + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + syncData(memspaceDst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 1: // cpu->gpu + // Set host data and copy it to device + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + syncData(memspaceDst); + + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 3: // gpu->gpu + // Set device data + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + // Set host data to null + if (h_row_data_ || h_col_data_ || h_val_data_) + { + out::error() << "Host data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + h_data_updated_ = false; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + default: + out::error() << "Csr constructor failed! " + << "Possible bug in memory spaces setting.\n"; + break; } } @@ -158,13 +175,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } } @@ -172,13 +190,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } } @@ -186,100 +205,117 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } } - int matrix::Csr::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + int matrix::Csr::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { - //four cases (for now) + // four cases (for now) index_type nnz_current = nnz_; setNotUpdated(); int control = -1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} - if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { control = 2;} - if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csr::copyDataFrom one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - this->h_row_data_ = new index_type[n_ + 1]; - this->h_col_data_ = new index_type[nnz_current]; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))) + { + control = 1; + } + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))) + { + control = 3; + } + + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csr::copyDataFrom one of host row or column data is null!\n"); + + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + this->h_row_data_ = new index_type[n_ + 1]; + this->h_col_data_ = new index_type[nnz_current]; owns_cpu_sparsity_pattern_ = true; } - if (h_val_data_ == nullptr) { + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csr::copyDataFrom one of device row or column data is null!\n"); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csr::copyDataFrom one of device row or column data is null!\n"); - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); owns_gpu_values_ = true; } - if (d_val_data_ == nullptr) { + if (d_val_data_ == nullptr) + { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; } } - - //copy - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + // copy + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; } - int matrix::Csr::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + int matrix::Csr::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { @@ -291,9 +327,10 @@ namespace ReSolve int matrix::Csr::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace);//just in case + destroyMatrixData(memspace); // just in case - if (memspace == memory::HOST) { + if (memspace == memory::HOST) + { this->h_row_data_ = new index_type[n_ + 1]; std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -301,16 +338,17 @@ namespace ReSolve this->h_val_data_ = new real_type[nnz_current]; std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; + owns_cpu_values_ = true; return 0; } - if (memspace == memory::DEVICE) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + if (memspace == memory::DEVICE) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; + owns_gpu_values_ = true; return 0; } return -1; @@ -331,70 +369,76 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - //check if we need to copy or not - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csr::syncData one of host row or column data is null!\n"); - - if (h_data_updated_) { - out::error() << "Csr::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) { - out::error() << "Csr::syncData is trying to sync host with device, but device is out of date!\n" - << "See Csr::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - h_row_data_ = new index_type[n_ + 1]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csr::syncData one of device row or column data is null!\n"); - - if (d_data_updated_) { - out::error() << "Csr::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) { - out::error() << "Csr::syncData is trying to sync device with host, but host is out of date!\n" - << "See Csr::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: + switch (memspace) + { + case HOST: + // check if we need to copy or not + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csr::syncData one of host row or column data is null!\n"); + + if (h_data_updated_) + { + out::error() << "Csr::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); return 1; + } + if (!d_data_updated_) + { + out::error() << "Csr::syncData is trying to sync host with device, but device is out of date!\n" + << "See Csr::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + h_row_data_ = new index_type[n_ + 1]; + h_col_data_ = new index_type[nnz_]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new real_type[nnz_]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csr::syncData one of device row or column data is null!\n"); + + if (d_data_updated_) + { + out::error() << "Csr::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); + return 1; + } + if (!h_data_updated_) + { + out::error() << "Csr::syncData is trying to sync device with host, but host is out of date!\n" + << "See Csr::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; } // switch } - /** * @brief Prints matrix data. * @@ -403,13 +447,14 @@ namespace ReSolve void matrix::Csr::print(std::ostream& out, index_type indexing_base) { out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for(index_type i = 0; i < n_; ++i) { - for (index_type j = h_row_data_[i]; j < h_row_data_[i+1]; ++j) { - out << i + indexing_base << " " + for (index_type i = 0; i < n_; ++i) + { + for (index_type j = h_row_data_[i]; j < h_row_data_[i + 1]; ++j) + { + out << i + indexing_base << " " << h_col_data_[j] + indexing_base << " " << h_val_data_[j] << "\n"; } } } } // namespace ReSolve - diff --git a/resolve/matrix/Csr.hpp b/resolve/matrix/Csr.hpp index dd11d57f1..3451b08ae 100644 --- a/resolve/matrix/Csr.hpp +++ b/resolve/matrix/Csr.hpp @@ -1,59 +1,62 @@ #pragma once #include -namespace ReSolve { namespace matrix { +namespace ReSolve +{ + namespace matrix + { - // Forward declaration of Coo - class Coo; + // Forward declaration of Coo + class Coo; - class Csr : public Sparse - { + class Csr : public Sparse + { public: Csr(); Csr(index_type n, index_type m, index_type nnz); - Csr(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); - - Csr(index_type n, - index_type m, + Csr(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + bool symmetric, + bool expanded); + + Csr(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst); - + ~Csr(); virtual index_type* getRowData(memory::MemorySpace memspace); virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual real_type* getValues(memory::MemorySpace memspace); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int allocateMatrixData(memory::MemorySpace memspace); + virtual int allocateMatrixData(memory::MemorySpace memspace); virtual void print(std::ostream& file_out = std::cout, index_type indexing_base = 0); virtual int syncData(memory::MemorySpace memspaceOut); + }; - }; - -}} // namespace ReSolve::matrix + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 552548d65..1d570af85 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -1,14 +1,16 @@ +#include "MatrixHandler.hpp" + #include #include -#include -#include + +#include "MatrixHandlerCpu.hpp" +#include "MatrixHandlerImpl.hpp" #include #include #include +#include +#include #include -#include "MatrixHandler.hpp" -#include "MatrixHandlerCpu.hpp" -#include "MatrixHandlerImpl.hpp" #ifdef RESOLVE_USE_CUDA #include "MatrixHandlerCuda.hpp" @@ -17,7 +19,8 @@ #include "MatrixHandlerHip.hpp" #endif -namespace ReSolve { +namespace ReSolve +{ // Create a shortcut name for Logger static class using out = io::Logger; @@ -43,7 +46,8 @@ namespace ReSolve { MatrixHandler::~MatrixHandler() { delete cpuImpl_; - if (isCudaEnabled_ || isHipEnabled_) { + if (isCudaEnabled_ || isHipEnabled_) + { delete devImpl_; } } @@ -56,7 +60,7 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceCpu* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(new_workspace); isCpuEnabled_ = true; isCudaEnabled_ = false; } @@ -72,8 +76,8 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(); - devImpl_ = new MatrixHandlerCuda(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(); + devImpl_ = new MatrixHandlerCuda(new_workspace); isCpuEnabled_ = true; isCudaEnabled_ = true; } @@ -90,8 +94,8 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceHIP* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(); - devImpl_ = new MatrixHandlerHip(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(); + devImpl_ = new MatrixHandlerHip(new_workspace); isCpuEnabled_ = true; isHipEnabled_ = true; } @@ -111,13 +115,14 @@ namespace ReSolve { void MatrixHandler::setValuesChanged(bool isValuesChanged, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - cpuImpl_->setValuesChanged(isValuesChanged); - break; - case DEVICE: - devImpl_->setValuesChanged(isValuesChanged); - break; + switch (memspace) + { + case HOST: + cpuImpl_->setValuesChanged(isValuesChanged); + break; + case DEVICE: + devImpl_->setValuesChanged(isValuesChanged); + break; } } @@ -134,21 +139,22 @@ namespace ReSolve { * * @return 0 if successful, 1 otherwise */ - int MatrixHandler::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, + int MatrixHandler::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta); - break; - case DEVICE: - return devImpl_->matvec(A, vec_x, vec_result, alpha, beta); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta); + break; + case DEVICE: + return devImpl_->matvec(A, vec_x, vec_result, alpha, beta); + break; } return 1; } @@ -162,16 +168,17 @@ namespace ReSolve { * * @return 0 if successful, 1 otherwise */ - int MatrixHandler::matrixInfNorm(matrix::Sparse *A, real_type* norm, memory::MemorySpace memspace) + int MatrixHandler::matrixInfNorm(matrix::Sparse* A, real_type* norm, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->matrixInfNorm(A, norm); - break; - case DEVICE: - return devImpl_->matrixInfNorm(A, norm); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->matrixInfNorm(A, norm); + break; + case DEVICE: + return devImpl_->matrixInfNorm(A, norm); + break; } return 1; } @@ -195,13 +202,14 @@ namespace ReSolve { int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->csc2csr(A_csc, A_csr); - break; - case DEVICE: - return devImpl_->csc2csr(A_csc, A_csr); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->csc2csr(A_csc, A_csr); + break; + case DEVICE: + return devImpl_->csc2csr(A_csc, A_csr); + break; } return 1; } @@ -222,33 +230,36 @@ namespace ReSolve { assert(A->getNumRows() == At->getNumColumns() && "Number of rows in A must be equal to number of columns in At"); assert(A->getNumColumns() == At->getNumRows() && "Number of columns in A must be equal to number of rows in At"); assert(A->getNnz() == At->getNnz() && "Number of nonzeros in A must be equal to number of nonzeros in At"); - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for transpose.\n"); - assert(At->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for transpose.\n"); - switch (memspace) { - case HOST: - if(A->getValues(memory::HOST) == nullptr) { - out::error() << "In MatrixHandler::transpose, A->getValues(memory::HOST) is null!\n"; - return 1; - } - if(At->getValues(memory::HOST) == nullptr) { - out::error() << "In MatrixHandler::transpose, At->getValues(memory::HOST) is null!\n"; - return 1; - } - return cpuImpl_->transpose(A, At); - break; - case DEVICE: - if(A->getValues(memory::DEVICE) == nullptr) { - out::error() << "In MatrixHandlerCuda::transpose, A->getValues(memory::DEVICE) is null!\n"; - return 1; - } - if(At->getValues(memory::DEVICE) == nullptr) { - out::error() << "In MatrixHandlerCuda::transpose, At->getValues(memory::DEVICE) is null!\n"; - return 1; - } - return devImpl_->transpose(A, At); - break; + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for transpose.\n"); + assert(At->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for transpose.\n"); + switch (memspace) + { + case HOST: + if (A->getValues(memory::HOST) == nullptr) + { + out::error() << "In MatrixHandler::transpose, A->getValues(memory::HOST) is null!\n"; + return 1; + } + if (At->getValues(memory::HOST) == nullptr) + { + out::error() << "In MatrixHandler::transpose, At->getValues(memory::HOST) is null!\n"; + return 1; + } + return cpuImpl_->transpose(A, At); + break; + case DEVICE: + if (A->getValues(memory::DEVICE) == nullptr) + { + out::error() << "In MatrixHandlerCuda::transpose, A->getValues(memory::DEVICE) is null!\n"; + return 1; + } + if (At->getValues(memory::DEVICE) == nullptr) + { + out::error() << "In MatrixHandlerCuda::transpose, At->getValues(memory::DEVICE) is null!\n"; + return 1; + } + return devImpl_->transpose(A, At); + break; } return 1; } @@ -269,20 +280,19 @@ namespace ReSolve { */ int MatrixHandler::leftScale(vector_type* diag, matrix::Csr* A, memory::MemorySpace memspace) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for left diagonal scaling.\n"); - assert(diag->getSize() == A->getNumRows() && - "Diagonal vector must be of the same size as the number of rows in the matrix."); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for left diagonal scaling.\n"); + assert(diag->getSize() == A->getNumRows() && "Diagonal vector must be of the same size as the number of rows in the matrix."); assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n"); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->leftScale(diag, A); - break; - case DEVICE: - return devImpl_->leftScale(diag, A); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->leftScale(diag, A); + break; + case DEVICE: + return devImpl_->leftScale(diag, A); + break; } return 1; } @@ -303,19 +313,19 @@ namespace ReSolve { */ int MatrixHandler::rightScale(matrix::Csr* A, vector_type* diag, memory::MemorySpace memspace) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for right diagonal scaling.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for right diagonal scaling.\n"); assert(diag->getSize() == A->getNumColumns() && "Diagonal vector must be of the same size as the number of columns in the matrix."); assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n"); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->rightScale(A, diag); - break; - case DEVICE: - return devImpl_->rightScale(A, diag); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->rightScale(A, diag); + break; + case DEVICE: + return devImpl_->rightScale(A, diag); + break; } return 1; } @@ -330,13 +340,14 @@ namespace ReSolve { void MatrixHandler::addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - cpuImpl_->addConst(A, alpha); - break; - case DEVICE: - devImpl_->addConst(A, alpha); - break; + switch (memspace) + { + case HOST: + cpuImpl_->addConst(A, alpha); + break; + case DEVICE: + devImpl_->addConst(A, alpha); + break; } } diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 219badd11..6b6785aba 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -2,28 +2,28 @@ #include #include - namespace ReSolve { namespace vector { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceCpu; class LinAlgWorkspaceCUDA; class LinAlgWorkspaceHIP; class MatrixHandlerImpl; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @brief this class encapsulates various matrix manipulation operations, @@ -45,46 +45,45 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandler(); - MatrixHandler(LinAlgWorkspaceCpu* workspace); - MatrixHandler(LinAlgWorkspaceCUDA* workspace); - MatrixHandler(LinAlgWorkspaceHIP* workspace); - ~MatrixHandler(); + public: + MatrixHandler(); + MatrixHandler(LinAlgWorkspaceCpu* workspace); + MatrixHandler(LinAlgWorkspaceCUDA* workspace); + MatrixHandler(LinAlgWorkspaceHIP* workspace); + ~MatrixHandler(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace); + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace); - int transpose(matrix::Csr* A, matrix::Csr* At, memory::MemorySpace memspace); + int transpose(matrix::Csr* A, matrix::Csr* At, memory::MemorySpace memspace); - int leftScale(vector_type* diag, matrix::Csr* A, memory::MemorySpace memspace); + int leftScale(vector_type* diag, matrix::Csr* A, memory::MemorySpace memspace); - int rightScale(matrix::Csr* A, vector_type* diag, memory::MemorySpace memspace); + int rightScale(matrix::Csr* A, vector_type* diag, memory::MemorySpace memspace); - void addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace); + void addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace); - /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped - int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - memory::MemorySpace memspace); - int matrixInfNorm(matrix::Sparse *A, real_type* norm, memory::MemorySpace memspace); - void setValuesChanged(bool toWhat, memory::MemorySpace memspace); + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped + int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + memory::MemorySpace memspace); + int matrixInfNorm(matrix::Sparse* A, real_type* norm, memory::MemorySpace memspace); + void setValuesChanged(bool toWhat, memory::MemorySpace memspace); - bool getIsCudaEnabled() const; - bool getIsHipEnabled() const; + bool getIsCudaEnabled() const; + bool getIsHipEnabled() const; - private: - bool new_matrix_{true}; ///< if the structure changed, you need a new handler. + private: + bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to host implementation - MatrixHandlerImpl* devImpl_{nullptr}; ///< Pointer to device implementation + MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to host implementation + MatrixHandlerImpl* devImpl_{nullptr}; ///< Pointer to device implementation - bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated - bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated - bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated + bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated + bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated + bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index b70a178ce..28d726e2b 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -1,29 +1,33 @@ +#include "MatrixHandlerCpu.hpp" + #include #include -#include -#include #include #include #include -#include "MatrixHandlerCpu.hpp" +#include +#include namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; + /** * @brief Empty constructor for MatrixHandlerCpu class. */ MatrixHandlerCpu::MatrixHandlerCpu() { } + /** * @brief Empty destructor for MatrixHandlerCpu class. */ MatrixHandlerCpu::~MatrixHandlerCpu() { } + /** * @brief Constructor for MatrixHandlerCpu class. * @param[in] new_workspace - pointer to LinAlgWorkspaceCpu object @@ -32,6 +36,7 @@ namespace ReSolve { workspace_ = new_workspace; } + /** * @brief Marks when values have changed in MatrixHandlerCpu class. * @param[in] values_changed - boolean value indicating if values have changed @@ -41,7 +46,6 @@ namespace ReSolve values_changed_ = values_changed; } - /** * @brief result := alpha * A * x + beta * result * @@ -59,41 +63,42 @@ namespace ReSolve * statement to select implementation for recognized input matrix * format. */ - int MatrixHandlerCpu::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, + int MatrixHandlerCpu::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, const real_type* alpha, const real_type* beta) { using namespace constants; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); index_type* ia = A->getRowData(memory::HOST); index_type* ja = A->getColData(memory::HOST); - real_type* a = A->getValues( memory::HOST); + real_type* a = A->getValues(memory::HOST); real_type* x_data = vec_x->getData(memory::HOST); real_type* result_data = vec_result->getData(memory::HOST); - real_type sum; - real_type y; - real_type t; - real_type c; + real_type sum; + real_type y; + real_type t; + real_type c; // Kahan algorithm for stability - for (int i = 0; i < A->getNumRows(); ++i) { + for (int i = 0; i < A->getNumRows(); ++i) + { sum = 0.0; - c = 0.0; - for (int j = ia[i]; j < ia[i+1]; ++j) { - y = ( a[j] * x_data[ja[j]]) - c; - t = sum + y; - c = (t - sum) - y; + c = 0.0; + for (int j = ia[i]; j < ia[i + 1]; ++j) + { + y = (a[j] * x_data[ja[j]]) - c; + t = sum + y; + c = (t - sum) - y; sum = t; // sum += (a[j] * x_data[ja[j]]); } - sum *= (*alpha); - result_data[i] = result_data[i]*(*beta) + sum; + sum *= (*alpha); + result_data[i] = result_data[i] * (*beta) + sum; } vec_result->setDataUpdated(memory::HOST); return 0; @@ -116,18 +121,20 @@ namespace ReSolve int MatrixHandlerCpu::matrixInfNorm(matrix::Sparse* A, real_type* norm) { using memory::HOST; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); real_type sum = 0.0; real_type nrm = 0.0; - for (index_type i = 0; i < A->getNumRows(); ++i) { + for (index_type i = 0; i < A->getNumRows(); ++i) + { sum = 0.0; - for (index_type j = A->getRowData(HOST)[i]; j < A->getRowData(HOST)[i+1]; ++j) { + for (index_type j = A->getRowData(HOST)[i]; j < A->getRowData(HOST)[i + 1]; ++j) + { sum += std::abs(A->getValues(HOST)[j]); } - if (i == 0 || sum > nrm) { + if (i == 0 || sum > nrm) + { nrm = sum; } } @@ -157,32 +164,36 @@ namespace ReSolve index_type* rowIdxCsc = A_csc->getRowData(memory::HOST); index_type* colPtrCsc = A_csc->getColData(memory::HOST); - real_type* valuesCsc = A_csc->getValues( memory::HOST); + real_type* valuesCsc = A_csc->getValues(memory::HOST); index_type* rowPtrCsr = A_csr->getRowData(memory::HOST); index_type* colIdxCsr = A_csr->getColData(memory::HOST); - real_type* valuesCsr = A_csr->getValues( memory::HOST); + real_type* valuesCsr = A_csr->getValues(memory::HOST); // Set all CSR row pointers to zero - for (index_type i = 0; i <= n; ++i) { + for (index_type i = 0; i <= n; ++i) + { rowPtrCsr[i] = 0; } // Set all CSR values and column indices to zero - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { colIdxCsr[i] = 0; valuesCsr[i] = 0.0; } // Compute number of entries per row - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { rowPtrCsr[rowIdxCsc[i]]++; } // Compute cumualtive sum of nnz per row - for (index_type row = 0, rowsum = 0; row < n; ++row) { + for (index_type row = 0, rowsum = 0; row < n; ++row) + { // Store value in row pointer to temp - index_type temp = rowPtrCsr[row]; + index_type temp = rowPtrCsr[row]; // Copy cumulative sum to the row pointer rowPtrCsr[row] = rowsum; @@ -192,10 +203,12 @@ namespace ReSolve } rowPtrCsr[n] = nnz; - for (index_type col = 0; col < m; ++col) { + for (index_type col = 0; col < m; ++col) + { // Compute positions of column indices and values in CSR matrix and store them there // Overwrites CSR row pointers in the process - for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++) { + for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col + 1]; jj++) + { index_type row = rowIdxCsc[jj]; index_type dest = rowPtrCsr[row]; @@ -207,10 +220,11 @@ namespace ReSolve } // Restore CSR row pointer values - for (index_type row = 0, last = 0; row <= n; row++) { - index_type temp = rowPtrCsr[row]; - rowPtrCsr[row] = last; - last = temp; + for (index_type row = 0, last = 0; row <= n; row++) + { + index_type temp = rowPtrCsr[row]; + rowPtrCsr[row] = last; + last = temp; } // Values on the host are updated now -- mark them as such! @@ -231,33 +245,37 @@ namespace ReSolve { assert(A->getValues(memory::HOST) != nullptr && "Matrix A is not allocated on host.\n"); assert(At->getValues(memory::HOST) != nullptr && "Matrix At is not allocated on host.\n"); - index_type n = A->getNumRows(); - index_type m = A->getNumColumns(); - index_type nnz = A->getNnz(); - index_type* rowPtrA = A->getRowData(memory::HOST); - index_type* colIdxA = A->getColData(memory::HOST); - real_type* valuesA = A->getValues( memory::HOST); + index_type n = A->getNumRows(); + index_type m = A->getNumColumns(); + index_type nnz = A->getNnz(); + index_type* rowPtrA = A->getRowData(memory::HOST); + index_type* colIdxA = A->getColData(memory::HOST); + real_type* valuesA = A->getValues(memory::HOST); index_type* rowPtrAt = At->getRowData(memory::HOST); index_type* colIdxAt = At->getColData(memory::HOST); - real_type* valuesAt = At->getValues( memory::HOST); + real_type* valuesAt = At->getValues(memory::HOST); // Set all CSR row pointers to zero - for (index_type i = 0; i <= m; ++i) { + for (index_type i = 0; i <= m; ++i) + { rowPtrAt[i] = 0; } // Set all CSR values and column indices to zero - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { colIdxAt[i] = 0; valuesAt[i] = 0.0; } // Compute number of entries per row - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { rowPtrAt[colIdxA[i]]++; } // Compute cumualtive sum of nnz per row - for (index_type row = 0, rowsum = 0; row < m; ++row) { + for (index_type row = 0, rowsum = 0; row < m; ++row) + { // Store value in row pointer to temp - index_type temp = rowPtrAt[row]; + index_type temp = rowPtrAt[row]; // Copy cumulative sum to the row pointer rowPtrAt[row] = rowsum; @@ -266,10 +284,12 @@ namespace ReSolve rowsum += temp; } rowPtrAt[m] = nnz; - for (index_type col = 0; col < n; ++col) { + for (index_type col = 0; col < n; ++col) + { // Compute positions of column indices and values in CSR matrix and store them there // Overwrites CSR row pointers in the process - for (index_type jj = rowPtrA[col]; jj < rowPtrA[col+1]; jj++) { + for (index_type jj = rowPtrA[col]; jj < rowPtrA[col + 1]; jj++) + { index_type row = colIdxA[jj]; index_type dest = rowPtrAt[row]; @@ -280,10 +300,11 @@ namespace ReSolve } } // Restore CSR row pointer values - for (index_type row = 0, last = 0; row <= m; row++) { - index_type temp = rowPtrAt[row]; - rowPtrAt[row] = last; - last = temp; + for (index_type row = 0, last = 0; row <= m; row++) + { + index_type temp = rowPtrAt[row]; + rowPtrAt[row] = last; + last = temp; } // Values on the host are updated now -- mark them as such! At->setUpdated(memory::HOST); @@ -306,12 +327,14 @@ namespace ReSolve */ int MatrixHandlerCpu::leftScale(vector_type* diag, matrix::Csr* A) { - real_type* diag_data = diag->getData(memory::HOST); - index_type* rowPtrA = A->getRowData(memory::HOST); - real_type* valuesA = A->getValues( memory::HOST); - - for (index_type i = 0; i < A->getNumRows(); ++i) { - for (index_type j = rowPtrA[i]; j < rowPtrA[i+1]; ++j) { + real_type* diag_data = diag->getData(memory::HOST); + index_type* rowPtrA = A->getRowData(memory::HOST); + real_type* valuesA = A->getValues(memory::HOST); + + for (index_type i = 0; i < A->getNumRows(); ++i) + { + for (index_type j = rowPtrA[i]; j < rowPtrA[i + 1]; ++j) + { valuesA[j] *= diag_data[i]; } } @@ -333,13 +356,15 @@ namespace ReSolve */ int MatrixHandlerCpu::rightScale(matrix::Csr* A, vector_type* diag) { - real_type* diag_data = diag->getData(memory::HOST); - index_type* rowPtrA = A->getRowData(memory::HOST); - index_type* colIdxA = A->getColData(memory::HOST); - real_type* valuesA = A->getValues( memory::HOST); - - for (index_type i = 0; i < A->getNumRows(); ++i) { - for (index_type j = rowPtrA[i]; j < rowPtrA[i+1]; ++j) { + real_type* diag_data = diag->getData(memory::HOST); + index_type* rowPtrA = A->getRowData(memory::HOST); + index_type* colIdxA = A->getColData(memory::HOST); + real_type* valuesA = A->getValues(memory::HOST); + + for (index_type i = 0; i < A->getNumRows(); ++i) + { + for (index_type j = rowPtrA[i]; j < rowPtrA[i + 1]; ++j) + { valuesA[j] *= diag_data[colIdxA[j]]; } } @@ -357,8 +382,9 @@ namespace ReSolve int MatrixHandlerCpu::addConst(matrix::Sparse* A, real_type alpha) { real_type* values = A->getValues(memory::HOST); - index_type nnz = A->getNnz(); - for (index_type i = 0; i < nnz; ++i) { + index_type nnz = A->getNnz(); + for (index_type i = 0; i < nnz; ++i) + { values[i] += alpha; } return 0; diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index e028cb84b..66b810787 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -9,18 +9,19 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceCpu; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerCpu * @@ -30,37 +31,36 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandlerCpu(); - MatrixHandlerCpu(LinAlgWorkspaceCpu* workspace); - virtual ~MatrixHandlerCpu(); + public: + MatrixHandlerCpu(); + MatrixHandlerCpu(LinAlgWorkspaceCpu* workspace); + virtual ~MatrixHandlerCpu(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; - int transpose(matrix::Csr* A, matrix::Csr* At) override; + int transpose(matrix::Csr* A, matrix::Csr* At) override; - int leftScale(vector_type* diag, matrix::Csr* A) override; + int leftScale(vector_type* diag, matrix::Csr* A) override; - int rightScale(matrix::Csr* A, vector_type* diag) override; + int rightScale(matrix::Csr* A, vector_type* diag) override; - int addConst(matrix::Sparse* A, real_type alpha) override; + int addConst(matrix::Sparse* A, real_type alpha) override; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) override; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) override; - virtual int matrixInfNorm(matrix::Sparse *A, real_type* norm) override; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; - void setValuesChanged(bool isValuesChanged) override; + void setValuesChanged(bool isValuesChanged) override; - private: - LinAlgWorkspaceCpu* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec + private: + LinAlgWorkspaceCpu* workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec - // MemoryHandler mem_; ///< Device memory manager object not used for now + // MemoryHandler mem_; ///< Device memory manager object not used for now }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index a856fad13..f88e9aaac 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -1,17 +1,20 @@ +#include "MatrixHandlerCuda.hpp" + #include -#include -#include +#include +#include + +#include // needed for inf nrm #include #include #include +#include +#include #include -#include "MatrixHandlerCuda.hpp" -#include -#include -#include // needed for inf nrm -namespace ReSolve { +namespace ReSolve +{ // Create a shortcut name for Logger static class using out = io::Logger; @@ -59,51 +62,51 @@ namespace ReSolve { * statement to select implementation for recognized input matrix * format. */ - int MatrixHandlerCuda::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, + int MatrixHandlerCuda::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, const real_type* alpha, const real_type* beta) { using namespace constants; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - int error_sum = 0; - //result = alpha *A*x + beta * result - cusparseStatus_t status; + int error_sum = 0; + // result = alpha *A*x + beta * result + cusparseStatus_t status; cusparseDnVecDescr_t vecx = workspace_->getVecX(); cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData(memory::DEVICE), CUDA_R_64F); - cusparseDnVecDescr_t vecAx = workspace_->getVecY(); cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData(memory::DEVICE), CUDA_R_64F); cusparseSpMatDescr_t matA = workspace_->getSpmvMatrixDescriptor(); - void* buffer_spmv = workspace_->getSpmvBuffer(); + void* buffer_spmv = workspace_->getSpmvBuffer(); cusparseHandle_t handle_cusparse = workspace_->getCusparseHandle(); - if (values_changed_) { - status = cusparseCreateCsr(&matA, + if (values_changed_) + { + status = cusparseCreateCsr(&matA, A->getNumRows(), A->getNumColumns(), A->getNnz(), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); - error_sum += status; - values_changed_ = false; + error_sum += status; + values_changed_ = false; } - if (!workspace_->matvecSetup()) { - //setup first, allocate, etc. + if (!workspace_->matvecSetup()) + { + // setup first, allocate, etc. size_t bufferSize = 0; - status = cusparseSpMV_bufferSize(handle_cusparse, + status = cusparseSpMV_bufferSize(handle_cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, &MINUS_ONE, matA, @@ -122,7 +125,7 @@ namespace ReSolve { workspace_->matvecSetupDone(); } - status = cusparseSpMV(handle_cusparse, + status = cusparseSpMV(handle_cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, matA, @@ -135,7 +138,7 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); if (status) - out::error() << "Matvec status: " << status << ". " + out::error() << "Matvec status: " << status << ". " << "Last error code: " << mem_.getLastDeviceError() << ".\n"; vec_result->setDataUpdated(memory::DEVICE); @@ -160,10 +163,10 @@ namespace ReSolve { */ int MatrixHandlerCuda::matrixInfNorm(matrix::Sparse* A, real_type* norm) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - if (workspace_->getNormBufferState() == false) { // not allocated + if (workspace_->getNormBufferState() == false) + { // not allocated real_type* buffer; mem_.allocateArrayOnDevice(&buffer, 1024); workspace_->setNormBuffer(buffer); @@ -171,8 +174,10 @@ namespace ReSolve { } real_type* d_r = workspace_->getDr(); - if (workspace_->getDrSize() != A->getNumRows()) { - if (d_r != nullptr) { + if (workspace_->getDrSize() != A->getNumRows()) + { + if (d_r != nullptr) + { mem_.deleteOnDevice(d_r); } mem_.allocateArrayOnDevice(&d_r, A->getNumRows()); @@ -190,43 +195,45 @@ namespace ReSolve { A->getNumRows(), d_r, norm, - workspace_->getNormBuffer() /* at least 8192 bytes */); + workspace_->getNormBuffer() /* at least 8192 bytes */); - if (status != 0) { - io::Logger::warning() << "Vector inf nrm returned "<allocateMatrixData(memory::DEVICE); - index_type m = A_csc->getNumColumns(); - index_type n = A_csc->getNumRows(); + index_type m = A_csc->getNumColumns(); + index_type n = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); // check dimensions of A_csc and A_csr assert(A_csc->getNumRows() == A_csr->getNumRows() && "Number of rows in A_csc must be equal to number of rows in A_csr"); assert(A_csc->getNumColumns() == A_csr->getNumColumns() && "Number of columns in A_csc must be equal to number of columns in A_csr"); - size_t bufferSize; - void* d_work; - cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), + size_t bufferSize; + void* d_work; + cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), m, n, nnz, - A_csc->getValues( memory::DEVICE), + A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), + A_csr->getValues(memory::DEVICE), A_csr->getRowData(memory::DEVICE), A_csr->getColData(memory::DEVICE), CUDA_R_64F, @@ -234,16 +241,16 @@ namespace ReSolve { CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, &bufferSize); - error_sum += status; + error_sum += status; mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), + status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), m, n, nnz, - A_csc->getValues( memory::DEVICE), + A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), + A_csr->getValues(memory::DEVICE), A_csr->getRowData(memory::DEVICE), A_csr->getColData(memory::DEVICE), CUDA_R_64F, @@ -252,8 +259,9 @@ namespace ReSolve { CUSPARSE_CSR2CSC_ALG1, d_work); error_sum += status; - if (status) { - out::error() << "CSC2CSR status: " << status << ". " + if (status) + { + out::error() << "CSC2CSR status: " << status << ". " << "Last error code: " << mem_.getLastDeviceError() << ".\n"; } mem_.deleteOnDevice(d_work); @@ -276,24 +284,25 @@ namespace ReSolve { */ int MatrixHandlerCuda::transpose(matrix::Csr* A, matrix::Csr* At) { - index_type error_sum = 0; - index_type m = A->getNumRows(); - index_type n = A->getNumColumns(); - index_type nnz = A->getNnz(); + index_type error_sum = 0; + index_type m = A->getNumRows(); + index_type n = A->getNumColumns(); + index_type nnz = A->getNnz(); cusparseStatus_t status; - bool allocated = workspace_->isTransposeBufferAllocated(); + bool allocated = workspace_->isTransposeBufferAllocated(); // check dimensions of A and At - if (!allocated) { + if (!allocated) + { // allocate transpose buffer size_t bufferSize; - status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), + status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), m, n, nnz, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - At->getValues( memory::DEVICE), + At->getValues(memory::DEVICE), At->getRowData(memory::DEVICE), At->getColData(memory::DEVICE), CUDA_R_64F, @@ -304,14 +313,14 @@ namespace ReSolve { error_sum += status; workspace_->setTransposeBufferWorkspace(bufferSize); } - status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), + status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), m, n, nnz, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - At->getValues( memory::DEVICE), + At->getValues(memory::DEVICE), At->getRowData(memory::DEVICE), At->getColData(memory::DEVICE), CUDA_R_64F, @@ -341,10 +350,10 @@ namespace ReSolve { */ int MatrixHandlerCuda::leftScale(vector_type* diag, matrix::Csr* A) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); // check values in A and diag cuda::leftScale(n, a_row_ptr, a_vals, diag_data); A->setUpdated(memory::DEVICE); @@ -366,11 +375,11 @@ namespace ReSolve { */ int MatrixHandlerCuda::rightScale(matrix::Csr* A, vector_type* diag) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); index_type* a_col_idx = A->getColData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); cuda::rightScale(n, a_row_ptr, a_col_idx, a_vals, diag_data); A->setUpdated(memory::DEVICE); return 0; @@ -387,7 +396,7 @@ namespace ReSolve { int MatrixHandlerCuda::addConst(matrix::Sparse* A, real_type alpha) { real_type* values = A->getValues(memory::DEVICE); - index_type nnz = A->getNnz(); + index_type nnz = A->getNnz(); cuda::addConst(nnz, alpha, values); return 0; } diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index 957dcf70d..f0efbcee8 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -9,18 +9,19 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceCUDA; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerCuda * @@ -30,37 +31,34 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); - virtual ~MatrixHandlerCuda(); - - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; + public: + MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); + virtual ~MatrixHandlerCuda(); - int transpose(matrix::Csr* A, matrix::Csr* At) override; + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; - int addConst(matrix::Sparse* A, real_type alpha) override; + int transpose(matrix::Csr* A, matrix::Csr* At) override; - int leftScale(vector_type* diag, matrix::Csr* A) override; + int addConst(matrix::Sparse* A, real_type alpha) override; - int rightScale(matrix::Csr* A, vector_type* diag) override; + int leftScale(vector_type* diag, matrix::Csr* A) override; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) override; - virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; + int rightScale(matrix::Csr* A, vector_type* diag) override; - void setValuesChanged(bool isValuesChanged) override; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) override; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; - private: - LinAlgWorkspaceCUDA* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec - - MemoryHandler mem_; ///< Device memory manager object + void setValuesChanged(bool isValuesChanged) override; + private: + LinAlgWorkspaceCUDA* workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec + MemoryHandler mem_; ///< Device memory manager object }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 3fbda0d14..ae97be1d4 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -1,16 +1,19 @@ +#include "MatrixHandlerHip.hpp" + #include -#include -#include +#include +#include + #include #include #include +#include +#include #include -#include -#include -#include "MatrixHandlerHip.hpp" -namespace ReSolve { +namespace ReSolve +{ // Create a shortcut name for Logger static class using out = io::Logger; @@ -41,7 +44,6 @@ namespace ReSolve { values_changed_ = values_changed; } - /** * @brief result := alpha * A * x + beta * result * @@ -59,41 +61,41 @@ namespace ReSolve { * statement to select implementation for recognized input matrix * format. */ - int MatrixHandlerHip::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, + int MatrixHandlerHip::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, const real_type* alpha, const real_type* beta) { using namespace constants; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - int error_sum = 0; - //result = alpha *A*x + beta * result + int error_sum = 0; + // result = alpha *A*x + beta * result rocsparse_status status; rocsparse_handle handle_rocsparse = workspace_->getRocsparseHandle(); - rocsparse_mat_info infoA = workspace_->getSpmvMatrixInfo(); - rocsparse_mat_descr descrA = workspace_->getSpmvMatrixDescriptor(); + rocsparse_mat_info infoA = workspace_->getSpmvMatrixInfo(); + rocsparse_mat_descr descrA = workspace_->getSpmvMatrixDescriptor(); - if (!workspace_->matvecSetup()) { - //setup first, allocate, etc. + if (!workspace_->matvecSetup()) + { + // setup first, allocate, etc. rocsparse_create_mat_descr(&(descrA)); rocsparse_set_mat_index_base(descrA, rocsparse_index_base_zero); rocsparse_set_mat_type(descrA, rocsparse_matrix_type_general); rocsparse_create_mat_info(&infoA); - status = rocsparse_dcsrmv_analysis(handle_rocsparse, + status = rocsparse_dcsrmv_analysis(handle_rocsparse, rocsparse_operation_none, A->getNumRows(), A->getNumColumns(), A->getNnz(), descrA, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), infoA); @@ -112,7 +114,7 @@ namespace ReSolve { A->getNnz(), alpha, descrA, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), infoA, @@ -122,8 +124,9 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); - if (status) { - out::error() << "Matvec status: " << status << ". " + if (status) + { + out::error() << "Matvec status: " << status << ". " << "Last error code: " << mem_.getLastDeviceError() << ".\n"; } vec_result->setDataUpdated(memory::DEVICE); @@ -147,14 +150,15 @@ namespace ReSolve { */ int MatrixHandlerHip::matrixInfNorm(matrix::Sparse* A, real_type* norm) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - real_type* d_r = workspace_->getDr(); + real_type* d_r = workspace_->getDr(); index_type d_r_size = workspace_->getDrSize(); - if (d_r_size != A->getNumRows()) { - if (d_r_size != 0) { + if (d_r_size != A->getNumRows()) + { + if (d_r_size != 0) + { mem_.deleteOnDevice(d_r); } mem_.allocateArrayOnDevice(&d_r, A->getNumRows()); @@ -162,7 +166,8 @@ namespace ReSolve { workspace_->setDr(d_r); } - if (workspace_->getNormBufferState() == false) { // not allocated + if (workspace_->getNormBufferState() == false) + { // not allocated real_type* buffer; mem_.allocateArrayOnDevice(&buffer, 1024); workspace_->setNormBuffer(buffer); @@ -198,11 +203,11 @@ namespace ReSolve { rocsparse_status status; A_csr->allocateMatrixData(memory::DEVICE); - index_type m = A_csc->getNumColumns(); - index_type n = A_csc->getNumRows(); + index_type m = A_csc->getNumColumns(); + index_type n = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); - size_t bufferSize; - void* d_work; + size_t bufferSize; + void* d_work; status = rocsparse_csr2csc_buffer_size(workspace_->getRocsparseHandle(), m, @@ -216,14 +221,14 @@ namespace ReSolve { error_sum += status; mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), + status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), m, n, nnz, - A_csc->getValues( memory::DEVICE), + A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), + A_csr->getValues(memory::DEVICE), A_csr->getColData(memory::DEVICE), A_csr->getRowData(memory::DEVICE), rocsparse_action_numeric, @@ -252,16 +257,17 @@ namespace ReSolve { */ int MatrixHandlerHip::transpose(matrix::Csr* A, matrix::Csr* At) { - index_type error_sum = 0; - index_type m = A->getNumRows(); - index_type n = A->getNumColumns(); - index_type nnz = A->getNnz(); + index_type error_sum = 0; + index_type m = A->getNumRows(); + index_type n = A->getNumColumns(); + index_type nnz = A->getNnz(); rocsparse_status status; - bool allocated = workspace_->isTransposeBufferAllocated(); - if (!allocated) { + bool allocated = workspace_->isTransposeBufferAllocated(); + if (!allocated) + { // allocate transpose buffer size_t bufferSize; - status = rocsparse_csr2csc_buffer_size(workspace_->getRocsparseHandle(), + status = rocsparse_csr2csc_buffer_size(workspace_->getRocsparseHandle(), m, n, nnz, @@ -272,14 +278,14 @@ namespace ReSolve { error_sum += status; workspace_->setTransposeBufferWorkspace(bufferSize); } - status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), + status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), m, n, nnz, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - At->getValues( memory::DEVICE), + At->getValues(memory::DEVICE), At->getColData(memory::DEVICE), At->getRowData(memory::DEVICE), rocsparse_action_numeric, @@ -303,7 +309,7 @@ namespace ReSolve { int MatrixHandlerHip::addConst(matrix::Sparse* A, real_type alpha) { real_type* values = A->getValues(memory::DEVICE); - index_type nnz = A->getNnz(); + index_type nnz = A->getNnz(); hip::addConst(nnz, alpha, values); return 0; } @@ -323,10 +329,10 @@ namespace ReSolve { */ int MatrixHandlerHip::leftScale(vector_type* diag, matrix::Csr* A) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); // check values in A and diag hip::leftScale(n, a_row_ptr, a_vals, diag_data); A->setUpdated(memory::DEVICE); @@ -348,11 +354,11 @@ namespace ReSolve { */ int MatrixHandlerHip::rightScale(matrix::Csr* A, vector_type* diag) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); index_type* a_col_idx = A->getColData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); hip::rightScale(n, a_row_ptr, a_col_idx, a_vals, diag_data); A->setUpdated(memory::DEVICE); return 0; diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp index e06dae945..7e69c3f1c 100644 --- a/resolve/matrix/MatrixHandlerHip.hpp +++ b/resolve/matrix/MatrixHandlerHip.hpp @@ -9,18 +9,19 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceHIP; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerHip * @@ -30,37 +31,34 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - - MatrixHandlerHip(LinAlgWorkspaceHIP* workspace); - virtual ~MatrixHandlerHip(); + public: + MatrixHandlerHip(LinAlgWorkspaceHIP* workspace); + virtual ~MatrixHandlerHip(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; - int transpose(matrix::Csr* A, matrix::Csr* At) override; + int transpose(matrix::Csr* A, matrix::Csr* At) override; - int leftScale(vector_type* diag, matrix::Csr* A) override; + int leftScale(vector_type* diag, matrix::Csr* A) override; - int rightScale(matrix::Csr* A, vector_type* diag) override; + int rightScale(matrix::Csr* A, vector_type* diag) override; - int addConst(matrix::Sparse* A, real_type alpha) override; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) override; + int addConst(matrix::Sparse* A, real_type alpha) override; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) override; - virtual int matrixInfNorm(matrix::Sparse *A, real_type* norm) override; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; - void setValuesChanged(bool isValuesChanged) override; + void setValuesChanged(bool isValuesChanged) override; - private: + private: + LinAlgWorkspaceHIP* workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec - LinAlgWorkspaceHIP* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec - - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index bb5eca5b3..d874dff16 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -6,17 +6,18 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } -} - + } // namespace matrix +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerImpl * @@ -26,31 +27,33 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandlerImpl() - {} - virtual ~MatrixHandlerImpl() - {} + public: + MatrixHandlerImpl() + { + } - virtual int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) = 0; + virtual ~MatrixHandlerImpl() + { + } - virtual int transpose(matrix::Csr* A, matrix::Csr* At) = 0; + virtual int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) = 0; - virtual int leftScale(vector_type* diag, matrix::Csr* A) = 0; + virtual int transpose(matrix::Csr* A, matrix::Csr* At) = 0; - virtual int rightScale(matrix::Csr* A, vector_type* diag) = 0; + virtual int leftScale(vector_type* diag, matrix::Csr* A) = 0; - virtual int addConst(matrix::Sparse* A, real_type alpha) = 0; + virtual int rightScale(matrix::Csr* A, vector_type* diag) = 0; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) = 0; - virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) = 0; + virtual int addConst(matrix::Sparse* A, real_type alpha) = 0; - virtual void setValuesChanged(bool isValuesChanged) = 0; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) = 0; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) = 0; + + virtual void setValuesChanged(bool isValuesChanged) = 0; }; } // namespace ReSolve - diff --git a/resolve/matrix/Sparse.cpp b/resolve/matrix/Sparse.cpp index 58ff1b93c..edad7ca76 100644 --- a/resolve/matrix/Sparse.cpp +++ b/resolve/matrix/Sparse.cpp @@ -1,39 +1,41 @@ -#include // <-- includes memcpy +#include "Sparse.hpp" + +#include // <-- includes memcpy #include -#include "Sparse.hpp" -namespace ReSolve { +namespace ReSolve +{ using out = io::Logger; - /** - * @brief empty constructor that does absolutely nothing + /** + * @brief empty constructor that does absolutely nothing */ matrix::Sparse::Sparse() { } - /** + /** * @brief basic constructor. It DOES NOT allocate any memory! * * @param[in] n - number of rows * @param[in] m - number of columns - * @param[in] nnz - number of non-zeros + * @param[in] nnz - number of non-zeros */ - matrix::Sparse::Sparse(index_type n, - index_type m, - index_type nnz): - n_{n}, - m_{m}, - nnz_{nnz} + matrix::Sparse::Sparse(index_type n, + index_type m, + index_type nnz) + : n_{n}, + m_{m}, + nnz_{nnz} { this->is_symmetric_ = false; - this->is_expanded_ = true; //default is a normal non-symmetric fully expanded matrix + this->is_expanded_ = true; // default is a normal non-symmetric fully expanded matrix setNotUpdated(); - //set everything to nullptr + // set everything to nullptr h_row_data_ = nullptr; h_col_data_ = nullptr; h_val_data_ = nullptr; @@ -41,37 +43,37 @@ namespace ReSolve { d_row_data_ = nullptr; d_col_data_ = nullptr; d_val_data_ = nullptr; - + owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - + owns_cpu_values_ = false; + owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; + owns_gpu_values_ = false; } - /** + /** * @brief another basic constructor. It DOES NOT allocate any memory! * * @param[in] n - number of rows * @param[in] m - number of columns - * @param[in] nnz - number of non-zeros - * @param[in] symmetric - true if symmetric, false if non-symmetric - * @param[in] expanded - true if expanded, false if not + * @param[in] nnz - number of non-zeros + * @param[in] symmetric - true if symmetric, false if non-symmetric + * @param[in] expanded - true if expanded, false if not */ - matrix::Sparse::Sparse(index_type n, - index_type m, + matrix::Sparse::Sparse(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded): - n_{n}, - m_{m}, - nnz_{nnz}, - is_symmetric_{symmetric}, - is_expanded_{expanded} + bool symmetric, + bool expanded) + : n_{n}, + m_{m}, + nnz_{nnz}, + is_symmetric_{symmetric}, + is_expanded_{expanded} { setNotUpdated(); - //set everything to nullptr + // set everything to nullptr h_row_data_ = nullptr; h_col_data_ = nullptr; h_val_data_ = nullptr; @@ -81,12 +83,12 @@ namespace ReSolve { d_val_data_ = nullptr; owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - + owns_cpu_values_ = false; + owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; + owns_gpu_values_ = false; } - + /** * @brief destructor * */ @@ -96,15 +98,15 @@ namespace ReSolve { this->destroyMatrixData(memory::DEVICE); } - /** + /** * @brief set the matrix update flags to false (for both HOST and DEVICE). */ void matrix::Sparse::setNotUpdated() { h_data_updated_ = false; - d_data_updated_ = false; + d_data_updated_ = false; } - + /** * @brief get number of matrix rows * @@ -163,8 +165,8 @@ namespace ReSolve { /** * @brief Set matrix symmetry property * - * @param[in] symmetric - true to set matrix to symmetric and false to set to non-symmetric - */ + * @param[in] symmetric - true to set matrix to symmetric and false to set to non-symmetric + */ void matrix::Sparse::setSymmetric(bool symmetric) { this->is_symmetric_ = symmetric; @@ -174,7 +176,7 @@ namespace ReSolve { * @brief Set matrix "expanded" property * * @param[in] expanded - true to set matrix to expanded and false to set to not expanded - */ + */ void matrix::Sparse::setExpanded(bool expanded) { this->is_expanded_ = expanded; @@ -184,7 +186,7 @@ namespace ReSolve { * @brief Set number of non-zeros. * * @param[in] nnz_new - new number of non-zeros - */ + */ void matrix::Sparse::setNnz(index_type nnz_new) { this->nnz_ = nnz_new; @@ -196,38 +198,39 @@ namespace ReSolve { * @param[in] memspace - memory space (HOST or DEVICE) to set to "updated" * * @return 0 if successful, -1 if not. - * + * * The method sets the boolean flag indicating that the `memspace` is updated. * It automatically sets the other data mirror to non-updated. You would * use this function if you update matrix data by accessing its raw pointers. * In such case, the matrix has no way of knowing which data is most recent, so * you have to tell it. - * + * * @warning This is an expert-level function. Use only if you know what you are * doing. - * + * * @note If you want to set both DEVICE and HOST memory to the same value * use syncData function. - */ + */ int matrix::Sparse::setUpdated(memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - h_data_updated_ = true; - d_data_updated_ = false; - break; - case DEVICE: - d_data_updated_ = true; - h_data_updated_ = false; - break; + switch (memspace) + { + case HOST: + h_data_updated_ = true; + d_data_updated_ = false; + break; + case DEVICE: + d_data_updated_ = true; + h_data_updated_ = false; + break; } return 0; } /** * @brief Set the pointers for matrix row, column, value data. - * + * * Useful if interfacing with other codes - this function only assigns * pointers, but it does not allocate nor copy anything. The data ownership * flags would be set to false (default). @@ -238,102 +241,112 @@ namespace ReSolve { * @param[in] memspace - memory space (HOST or DEVICE) of incoming data * * @return 0 if successful, 1 if not. - */ - int matrix::Sparse::setDataPointers(index_type* row_data, - index_type* col_data, - real_type* val_data, + */ + int matrix::Sparse::setDataPointers(index_type* row_data, + index_type* col_data, + real_type* val_data, memory::MemorySpace memspace) { using namespace ReSolve::memory; setNotUpdated(); - switch (memspace) { - case HOST: - if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) { - out::error() << "Trying to set matrix host data, but the data already set!\n"; - out::error() << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_cpu_values_ && h_val_data_) { - out::error() << "Trying to set matrix host values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_row_data_ = row_data; - h_col_data_ = col_data; - h_val_data_ = val_data; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) { - out::error() << "Trying to set matrix host data, but the data already set!\n"; - out::error() << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_gpu_values_ && d_val_data_) { - out::error() << "Trying to set matrix device values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_row_data_ = row_data; - d_col_data_ = col_data; - d_val_data_ = val_data; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - break; + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) + { + out::error() << "Trying to set matrix host data, but the data already set!\n"; + out::error() << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_cpu_values_ && h_val_data_) + { + out::error() << "Trying to set matrix host values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_row_data_ = row_data; + h_col_data_ = col_data; + h_val_data_ = val_data; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) + { + out::error() << "Trying to set matrix host data, but the data already set!\n"; + out::error() << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_gpu_values_ && d_val_data_) + { + out::error() << "Trying to set matrix device values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_row_data_ = row_data; + d_col_data_ = col_data; + d_val_data_ = val_data; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + break; } return 0; } - + /** - * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it + * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it * (will attempt to destroy all three arrays). * * @param[in] memspace - memory space (HOST or DEVICE) of incoming data * * @return 0 if successful, -1 if not. * - */ + */ int matrix::Sparse::destroyMatrixData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - if (owns_cpu_sparsity_pattern_) { - delete [] h_row_data_; - delete [] h_col_data_; - h_row_data_ = nullptr; - h_col_data_ = nullptr; - } - if (owns_cpu_values_) { - delete [] h_val_data_; - h_val_data_ = nullptr; - } - return 0; - case DEVICE: - if (owns_gpu_sparsity_pattern_) { - mem_.deleteOnDevice(d_row_data_); - mem_.deleteOnDevice(d_col_data_); - d_row_data_ = nullptr; - d_col_data_ = nullptr; - } - if (owns_gpu_values_) { - mem_.deleteOnDevice(d_val_data_); - d_val_data_ = nullptr; - } - return 0; - default: - return -1; + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_) + { + delete[] h_row_data_; + delete[] h_col_data_; + h_row_data_ = nullptr; + h_col_data_ = nullptr; + } + if (owns_cpu_values_) + { + delete[] h_val_data_; + h_val_data_ = nullptr; + } + return 0; + case DEVICE: + if (owns_gpu_sparsity_pattern_) + { + mem_.deleteOnDevice(d_row_data_); + mem_.deleteOnDevice(d_col_data_); + d_row_data_ = nullptr; + d_col_data_ = nullptr; + } + if (owns_gpu_values_) + { + mem_.deleteOnDevice(d_val_data_); + d_val_data_ = nullptr; + } + return 0; + default: + return -1; } } /** * @brief updata matrix values using the _new_values_ provided either as HOST or as DEVICE array. - * + * * This function will copy the data (not just assign a pointer) and allocate if needed. * It also sets ownership and update flags. * @@ -342,56 +355,73 @@ namespace ReSolve { * @param[in] memspaceOut - memory space (HOST or DEVICE) of matrix values to be updated. * * @return 0 if successful, -1 if not. - */ - int matrix::Sparse::copyValues(const real_type* new_vals, + */ + int matrix::Sparse::copyValues(const real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { - + index_type nnz_current = nnz_; - //four cases (for now) + // four cases (for now) setNotUpdated(); - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 1;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 2;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - if (h_val_data_ == nullptr) { + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) + { + control = 1; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) + { + control = 3; + } + + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_values_ = true; } } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 2://cuda->cpu - mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->cuda - mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - case 3://cuda->cuda - mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 2: // cuda->cpu + mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->cuda + mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + case 3: // cuda->cuda + mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; } @@ -399,7 +429,7 @@ namespace ReSolve { /** * @brief updata matrix values using the _new_values_ provided either as * HOST or as DEVICE array. - * + * * This function only assigns a pointer, but does not copy. It sets update * flags. * @@ -407,39 +437,41 @@ namespace ReSolve { * @param[in] memspace - memory space (HOST or DEVICE) of _new_vals_ * * @return 0 if successful, -1 if not. - */ - int matrix::Sparse::setValuesPointer(real_type* new_vals, + */ + int matrix::Sparse::setValuesPointer(real_type* new_vals, memory::MemorySpace memspace) { using namespace ReSolve::memory; setNotUpdated(); - switch (memspace) { - case HOST: - if (owns_cpu_values_ && h_val_data_) { - out::error() << "Trying to set matrix host values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_val_data_ = new_vals; - h_data_updated_ = true; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_values_ && d_val_data_) { - out::error() << "Trying to set matrix device values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_val_data_ = new_vals; - d_data_updated_ = true; - owns_gpu_values_ = false; - break; - default: - return -1; + switch (memspace) + { + case HOST: + if (owns_cpu_values_ && h_val_data_) + { + out::error() << "Trying to set matrix host values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_val_data_ = new_vals; + h_data_updated_ = true; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_values_ && d_val_data_) + { + out::error() << "Trying to set matrix device values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_val_data_ = new_vals; + d_data_updated_ = true; + owns_gpu_values_ = false; + break; + default: + return -1; } return 0; } } // namespace ReSolve - diff --git a/resolve/matrix/Sparse.hpp b/resolve/matrix/Sparse.hpp index 5c5144ec7..55d1b50dc 100644 --- a/resolve/matrix/Sparse.hpp +++ b/resolve/matrix/Sparse.hpp @@ -1,77 +1,87 @@ // Matrix utilities -// Mirroring memory approach +// Mirroring memory approach #pragma once -#include #include +#include #include + #include #include -namespace ReSolve { namespace matrix { - - /** - * @brief This class implements basic sparse matrix interface. - * - * Most of sparse matrix formats store information about matrix rows and - * columns as integers and nonzero element values as real numbers. - * This class is virtual and implements only what is common for all basic - * formats. Note that regardless of how row/column information is stored, - * all nonzero matrix values need to be stored, so all utilities needed for - * the values are implemented in this class. - * - * @author Kasia Swirydowicz - */ - class Sparse +namespace ReSolve +{ + namespace matrix { + + /** + * @brief This class implements basic sparse matrix interface. + * + * Most of sparse matrix formats store information about matrix rows and + * columns as integers and nonzero element values as real numbers. + * This class is virtual and implements only what is common for all basic + * formats. Note that regardless of how row/column information is stored, + * all nonzero matrix values need to be stored, so all utilities needed for + * the values are implemented in this class. + * + * @author Kasia Swirydowicz + */ + class Sparse + { public: /// Supported sparse matrix formats - enum SparseFormat{NONE, TRIPLET, COMPRESSED_SPARSE_ROW, COMPRESSED_SPARSE_COLUMN}; + enum SparseFormat + { + NONE, + TRIPLET, + COMPRESSED_SPARSE_ROW, + COMPRESSED_SPARSE_COLUMN + }; public: - //basic constructor + // basic constructor Sparse(); Sparse(index_type n, index_type m, index_type nnz); - Sparse(index_type n, - index_type m, + Sparse(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded); + bool symmetric, + bool expanded); virtual ~Sparse(); // accessors - index_type getNumRows(); - index_type getNumColumns(); - index_type getNnz(); + index_type getNumRows(); + index_type getNumColumns(); + index_type getNnz(); SparseFormat getSparseFormat() const; - bool symmetric(); + bool symmetric(); bool expanded(); void setSymmetric(bool symmetric); void setExpanded(bool expanded); void setNnz(index_type nnz_new); // for resetting when removing duplicates - int setUpdated(memory::MemorySpace what); + int setUpdated(memory::MemorySpace what); virtual index_type* getRowData(memory::MemorySpace memspace) = 0; virtual index_type* getColData(memory::MemorySpace memspace) = 0; - virtual real_type* getValues( memory::MemorySpace memspace) = 0; + virtual real_type* getValues(memory::MemorySpace memspace) = 0; - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; virtual int allocateMatrixData(memory::MemorySpace memspace) = 0; - int setDataPointers(index_type* row_data, - index_type* col_data, - real_type* val_data, - memory::MemorySpace memspace); + int setDataPointers(index_type* row_data, + index_type* col_data, + real_type* val_data, + memory::MemorySpace memspace); int destroyMatrixData(memory::MemorySpace memspace); @@ -79,40 +89,39 @@ namespace ReSolve { namespace matrix { virtual int syncData(memory::MemorySpace memspaceOut) = 0; - - //update Values just updates values; it allocates if necessary. - //values have the same dimensions between different formats - virtual int copyValues(const real_type* new_vals, + // update Values just updates values; it allocates if necessary. + // values have the same dimensions between different formats + virtual int copyValues(const real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - - //set new values just sets the pointer, use caution. - virtual int setValuesPointer(real_type* new_vals, + + // set new values just sets the pointer, use caution. + virtual int setValuesPointer(real_type* new_vals, memory::MemorySpace memspace); - + protected: SparseFormat sparse_format_{NONE}; ///< Matrix format - index_type n_{0}; ///< number of rows - index_type m_{0}; ///< number of columns - index_type nnz_{0}; ///< number of non-zeros + index_type n_{0}; ///< number of rows + index_type m_{0}; ///< number of columns + index_type nnz_{0}; ///< number of non-zeros bool is_symmetric_{false}; ///< symmetry flag - bool is_expanded_{false}; ///< "expanded" flag + bool is_expanded_{false}; ///< "expanded" flag - //host data - index_type* h_row_data_{nullptr}; ///< row data (HOST) - index_type* h_col_data_{nullptr}; ///< column data (HOST) - real_type* h_val_data_{nullptr}; ///< value data (HOST) - bool h_data_updated_{false}; ///< HOST update flag + // host data + index_type* h_row_data_{nullptr}; ///< row data (HOST) + index_type* h_col_data_{nullptr}; ///< column data (HOST) + real_type* h_val_data_{nullptr}; ///< value data (HOST) + bool h_data_updated_{false}; ///< HOST update flag - //gpu data - index_type* d_row_data_{nullptr}; ///< row data (DEVICE) - index_type* d_col_data_{nullptr}; ///< column data (DEVICE) - real_type* d_val_data_{nullptr}; ///< value data (DEVICE) - bool d_data_updated_{false}; ///< DEVICE update flag + // gpu data + index_type* d_row_data_{nullptr}; ///< row data (DEVICE) + index_type* d_col_data_{nullptr}; ///< column data (DEVICE) + real_type* d_val_data_{nullptr}; ///< value data (DEVICE) + bool d_data_updated_{false}; ///< DEVICE update flag void setNotUpdated(); - + // Data ownership flags bool owns_cpu_sparsity_pattern_{false}; ///< for row/col data bool owns_cpu_values_{false}; ///< for nonzero values @@ -121,5 +130,6 @@ namespace ReSolve { namespace matrix { bool owns_gpu_values_{false}; ///< for nonzero values MemoryHandler mem_; ///< Device memory manager object - }; -}} // namespace ReSolve::matrix + }; + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/io.cpp b/resolve/matrix/io.cpp index 0e0e886e4..cf241d73e 100644 --- a/resolve/matrix/io.cpp +++ b/resolve/matrix/io.cpp @@ -1,161 +1,166 @@ -#include -#include -#include +#include "io.hpp" + #include -#include +#include +#include #include +#include +#include -#include -#include #include #include -#include "io.hpp" - +#include +#include namespace ReSolve -{ - +{ + /** * @class MatrixElementTriplet - * + * * @brief Helper class for COO matrix sorting. - * + * * Contains triplet of row index, column index and the value of a sparse * matrix element, as well as methods and operator overloads for its * management. - * + * * The entire code is in this file. Its scope is to support matrix file I/O * only. - * + * */ class MatrixElementTriplet { - public: - /// Default constructor initializes all to zero. - MatrixElementTriplet() : rowidx_(0), colidx_(0), value_(0.0) - {} + public: + /// Default constructor initializes all to zero. + MatrixElementTriplet() + : rowidx_(0), colidx_(0), value_(0.0) + { + } - /// Constructor that initializes row and column indices and the element value. - MatrixElementTriplet(index_type i, index_type j, real_type v) : rowidx_(i), colidx_(j), value_(v) - {} + /// Constructor that initializes row and column indices and the element value. + MatrixElementTriplet(index_type i, index_type j, real_type v) + : rowidx_(i), colidx_(j), value_(v) + { + } - ~MatrixElementTriplet() = default; + ~MatrixElementTriplet() = default; - /// Set the row and column indices and the element value. - void set(index_type rowidx, index_type colidx, real_type value) - { - rowidx_ = rowidx; - colidx_ = colidx; - value_ = value; - } + /// Set the row and column indices and the element value. + void set(index_type rowidx, index_type colidx, real_type value) + { + rowidx_ = rowidx; + colidx_ = colidx; + value_ = value; + } - index_type getRowIdx() const - { - return rowidx_; - } - index_type getColIdx() const - { - return colidx_; - } - real_type getValue() const - { - return value_; - } + index_type getRowIdx() const + { + return rowidx_; + } - /** - * @brief Overload of `<` operator - * - * Ensures that matrix elements stored in MatrixElementTriplet will be - * sorted by their indices in a row-major order. - * - */ - bool operator < (const MatrixElementTriplet& t) const - { - if (rowidx_ < t.rowidx_) - return true; + index_type getColIdx() const + { + return colidx_; + } - if ((rowidx_ == t.rowidx_) && (colidx_ < t.colidx_)) - return true; + real_type getValue() const + { + return value_; + } - return false; - } + /** + * @brief Overload of `<` operator + * + * Ensures that matrix elements stored in MatrixElementTriplet will be + * sorted by their indices in a row-major order. + * + */ + bool operator<(const MatrixElementTriplet& t) const + { + if (rowidx_ < t.rowidx_) + return true; - /** - * @brief Overload of `==` operator. - * - * This overload is used to indicate when two different instances of - * MatrixElementTriplet correspond to the same matrix element. - */ - bool operator == (const MatrixElementTriplet& str) const - { - return (rowidx_ == str.rowidx_) && (colidx_ == str.colidx_); - } + if ((rowidx_ == t.rowidx_) && (colidx_ < t.colidx_)) + return true; - /** - * @brief Overload of `+=` operator. - * - * @param t - Triplet to be added in place. - * @return MatrixElementTriplet& - reference to `*this`. - * - * This overload is used to merge duplicates in sparse matrix in COO - * format. It will return error and leave `*this` unchanged if the - * argument corresponds to an element with different row or column - * indices. - */ - MatrixElementTriplet& operator += (const MatrixElementTriplet t) + return false; + } + + /** + * @brief Overload of `==` operator. + * + * This overload is used to indicate when two different instances of + * MatrixElementTriplet correspond to the same matrix element. + */ + bool operator==(const MatrixElementTriplet& str) const + { + return (rowidx_ == str.rowidx_) && (colidx_ == str.colidx_); + } + + /** + * @brief Overload of `+=` operator. + * + * @param t - Triplet to be added in place. + * @return MatrixElementTriplet& - reference to `*this`. + * + * This overload is used to merge duplicates in sparse matrix in COO + * format. It will return error and leave `*this` unchanged if the + * argument corresponds to an element with different row or column + * indices. + */ + MatrixElementTriplet& operator+=(const MatrixElementTriplet t) + { + if ((rowidx_ != t.rowidx_) || (colidx_ != t.colidx_)) { - if ((rowidx_ != t.rowidx_) || (colidx_ != t.colidx_)) { - io::Logger::error() << "Adding values into non-matching triplet.\n"; - return *this; - } - value_ += t.value_; + io::Logger::error() << "Adding values into non-matching triplet.\n"; return *this; } + value_ += t.value_; + return *this; + } - /// Utility to print indices (0 index base). - void print() const - { - // Add 1 to indices to restore indexing from MM format - std::cout << rowidx_ << " " << colidx_ << " " << value_ << "\n"; - } + /// Utility to print indices (0 index base). + void print() const + { + // Add 1 to indices to restore indexing from MM format + std::cout << rowidx_ << " " << colidx_ << " " << value_ << "\n"; + } - private: - index_type rowidx_{0}; - index_type colidx_{0}; - real_type value_{0.0}; + private: + index_type rowidx_{0}; + index_type colidx_{0}; + real_type value_{0.0}; }; - namespace io { // Static helper functionsdeclarations - static void createMatrixFromFileAsList(std::istream& file, - bool is_expand_symmetric, + static void createMatrixFromFileAsList(std::istream& file, + bool is_expand_symmetric, std::list& tmp, - index_type& n, - index_type& m, - index_type& nnz, - bool& symmetric, - bool& expanded); - static void createMatrixFromFileAsList(std::istream& file, - matrix::Sparse* A, + index_type& n, + index_type& m, + index_type& nnz, + bool& symmetric, + bool& expanded); + static void createMatrixFromFileAsList(std::istream& file, + matrix::Sparse* A, std::list& tmp); // static void print_list(std::list& l); - static int loadToList(std::istream& file, bool is_expand_symmetric, std::list& tmp); - static int removeDuplicates(std::list& tmp); - static int copyListToCoo(const std::list& tmp, matrix::Coo* A); - static int copyListToCsr(const std::list& tmp, matrix::Csr* A); - + static int loadToList(std::istream& file, bool is_expand_symmetric, std::list& tmp); + static int removeDuplicates(std::list& tmp); + static int copyListToCoo(const std::list& tmp, matrix::Coo* A); + static int copyListToCsr(const std::list& tmp, matrix::Csr* A); /** * @brief Create a COO matrix and populate it with data from Matrix Market * file. - * + * * @param file - input Matrix Market file * @param is_expand_symmetric - whether to expand symmetric matrix to general format * @return matrix::Coo* - pointer to COO matrix - * + * * @pre file is a valid std::istream with Matrix Market data. * @pre input data is in valid Matrix Market format. * @post Valid COO matrix sorted in row major order and without duplicates @@ -163,14 +168,16 @@ namespace ReSolve */ matrix::Coo* createCooFromFile(std::istream& file, bool is_expand_symmetric) { - if(!file) { - Logger::error() << "Empty input to createCooFromFile function ... \n" << std::endl; + if (!file) + { + Logger::error() << "Empty input to createCooFromFile function ... \n" + << std::endl; return nullptr; } index_type m = 0, n = 0, nnz = 0; - bool symmetric = false; - bool expanded = true; + bool symmetric = false; + bool expanded = true; std::list tmp; @@ -186,12 +193,12 @@ namespace ReSolve } /** - * @brief - * + * @brief + * * @param file - input Matrix Market file * @param is_expand_symmetric - whether to expand symmetric matrix to general format * @return matrix::Csr* - pointer to COO matrix - * + * * @pre file is a valid std::istream with Matrix Market data. * @pre input data is in valid Matrix Market format. * @post Valid CSR matrix sorted in row major order and without duplicates @@ -199,14 +206,16 @@ namespace ReSolve */ matrix::Csr* createCsrFromFile(std::istream& file, bool is_expand_symmetric) { - if(!file) { - Logger::error() << "Empty input to createCooFromFile function ... \n" << std::endl; + if (!file) + { + Logger::error() << "Empty input to createCooFromFile function ... \n" + << std::endl; return nullptr; } index_type m = 0, n = 0, nnz = 0; - bool symmetric = false; - bool expanded = true; + bool symmetric = false; + bool expanded = true; std::list tmp; @@ -223,37 +232,41 @@ namespace ReSolve /** * @brief Imports vector data from a Matrix Market file. - * + * * @param file - std::istream to Matrix Market file (data). * @return real_type* - pointer to array with (dense) vector entries. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @post A raw array with vector data is created. - * + * */ real_type* createArrayFromFile(std::istream& file) { - if(!file) { - Logger::error() << "Empty input to " << __func__ << " function ... \n" << std::endl; + if (!file) + { + Logger::error() << "Empty input to " << __func__ << " function ... \n" + << std::endl; return nullptr; } std::stringstream ss; - std::string line; - index_type i = 0; - index_type n, m; + std::string line; + index_type i = 0; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } ss << line; ss >> n >> m; real_type* vec = new real_type[n]; - real_type a; - while (file >> a) { + real_type a; + while (file >> a) + { vec[i] = a; i++; } @@ -262,19 +275,21 @@ namespace ReSolve vector::Vector* createVectorFromFile(std::istream& file) { - if(!file) { + if (!file) + { Logger::error() << "Empty input to " << __func__ << " function ... \n"; return nullptr; } std::stringstream ss; - std::string line; - index_type i = 0; - index_type n, m; + std::string line; + index_type i = 0; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } ss << line; ss >> n >> m; @@ -282,7 +297,8 @@ namespace ReSolve vector::Vector* vec = new vector::Vector(n); vec->allocate(memory::HOST); real_type a; - while (file >> a) { + while (file >> a) + { vec->getData(memory::HOST)[i] = a; i++; } @@ -292,13 +308,13 @@ namespace ReSolve /** * @brief Reads data from a Matrix Market file and updates COO matrix A. - * + * * Compute complexity of this function is O(NNZ*log(NNZ)). There is an * overload of this function that generates a CSR matrix. - * + * * @param file - std::istream to Matrix Market file (data). * @param A - output COO matrix. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre Size of matrix stored in the Matrix Market file matches the size of A. @@ -307,7 +323,8 @@ namespace ReSolve */ void updateMatrixFromFile(std::istream& file, matrix::Coo* A) { - if (!file) { + if (!file) + { Logger::error() << "Empty input to createCooFromFile function ..." << std::endl; return; } @@ -322,13 +339,13 @@ namespace ReSolve /** * @brief Reads data from a Matrix Market file and updates CSR matrix A. - * + * * Compute complexity of this function is O(NNZ*log(NNZ)). There is an * overload of this function that generates a COO matrix. - * + * * @param file - std::istream to Matrix Market file (data). * @param A - output CSR matrix. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre Size of matrix stored in the Matrix Market file matches the size of A. @@ -337,7 +354,8 @@ namespace ReSolve */ void updateMatrixFromFile(std::istream& file, matrix::Csr* A) { - if(!file) { + if (!file) + { Logger::error() << "Empty input to updateMatrixFromFile function ..." << std::endl; return; } @@ -352,74 +370,82 @@ namespace ReSolve /** * @brief Reads data from a Matrix Market file and updates array p_rhs. - * + * * @param file - std::istream to Matrix Market file (data). * @param p_rhs - pointer to a pointer to a raw array with vector data. - * + * * @todo The righ-hand-side should be of vector type, not a raw array. With * current implementation it is impossible to verify if the sufficient * space is allocated to store all the data from the input file. Risk of * writing past the end of the array is high. */ - void updateArrayFromFile(std::istream& file, real_type** p_rhs) + void updateArrayFromFile(std::istream& file, real_type** p_rhs) { - if (!file) { + if (!file) + { Logger::error() << "Empty input to updateArrayFromFile function ..." << std::endl; return; } - real_type* rhs = *p_rhs; + real_type* rhs = *p_rhs; std::stringstream ss; - std::string line; - index_type n, m; + std::string line; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } ss << line; ss >> n >> m; - if (rhs == nullptr) { + if (rhs == nullptr) + { rhs = new real_type[n]; - } - real_type a; + } + real_type a; index_type i = 0; - while (file >> a) { + while (file >> a) + { rhs[i] = a; i++; } } - void updateVectorFromFile(std::istream& file, vector::Vector* vec_rhs) + void updateVectorFromFile(std::istream& file, vector::Vector* vec_rhs) { - if (!file) { + if (!file) + { Logger::error() << "Empty input to updateArrayFromFile function ..." << std::endl; return; } std::stringstream ss; - std::string line; - index_type n, m; + std::string line; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); // std::cout<> n >> m; - if (n != vec_rhs->getSize()) { + if (n != vec_rhs->getSize()) + { Logger::error() << "File data does not match the vector size.\n" << "Vector not updated!\n"; return; } real_type* rhs = vec_rhs->getData(memory::HOST); - real_type a = 0.0; - index_type i = 0; - while (file >> a) { + real_type a = 0.0; + index_type i = 0; + while (file >> a) + { rhs[i] = a; // std::cout << i << ": " << a << "\n"; i++; @@ -429,32 +455,36 @@ namespace ReSolve /** * @brief Writes matrix A to a file in Matrix Market format. - * + * * @param A - input matrix. * @param file_out - std::ostream to output file. * @return int - 0 if successful, error code otherwise. - * + * * @pre `A` is a valid sparse matrix. * @post Valid Matrix Marked data is written to std::ostream. * @invariant Matrix `A` elements are unchanged. */ int writeMatrixToFile(matrix::Sparse* A, std::ostream& file_out) { - if (A == nullptr) { + if (A == nullptr) + { Logger::error() << "Matrix pointer is NULL!\n"; return -1; } - if (A->symmetric() && !A->expanded()) { + if (A->symmetric() && !A->expanded()) + { file_out << "%%MatrixMarket matrix coordinate real symmetric\n"; - } else { + } + else + { file_out << "%%MatrixMarket matrix coordinate real general\n"; } file_out << "% Generated by Re::Solve \n"; - file_out << A->getNumRows() << " " + file_out << A->getNumRows() << " " << A->getNumColumns() << " " - << A->getNnz() << "\n"; - + << A->getNnz() << "\n"; + index_type indexing_base = 1; A->print(file_out, indexing_base); return 0; @@ -462,11 +492,11 @@ namespace ReSolve /** * @brief Writes vector data to a file in Matrix Market format. - * + * * @param vec_x - Input vector. * @param file_out - std::ostream to output file. * @return int - 0 if successful, error code otherwise. - * + * * @pre `vec_x` is a valid vector. * @post Valid Matrix Market data is written to std::ostream. * @invariant Elements of `vec_x` are unchanged. @@ -478,7 +508,8 @@ namespace ReSolve file_out << "%%MatrixMarket matrix array real general \n"; file_out << "% Generated by Re::Solve \n"; file_out << vec_x->getSize() << " " << 1 << "\n"; - for (int i = 0; i < vec_x->getSize(); ++i) { + for (int i = 0; i < vec_x->getSize(); ++i) + { file_out << std::setprecision(std::numeric_limits::digits10 + 1) << std::scientific << x_data[i] << "\n"; } @@ -486,7 +517,6 @@ namespace ReSolve return 0; } - // // Static helper functions // @@ -494,16 +524,16 @@ namespace ReSolve /** * @brief Reads Matrix Market data from std::istream and stores it into * std::list. - * + * * @param[in] file - std::istream to Matrix Market file (data). * @param[in] is_expand_symmetric - whether to expand symmetric matrix to general format * @param[out] tmp - std::list where to store matrix data * @param[out] n - number of rows as read from Matrix Market file - * @param[out] m - number of columns as read from Matrix Market file + * @param[out] m - number of columns as read from Matrix Market file * @param[out] nnz - calculated number of matrix nonzeros * @param[out] symmetric - if matrix is symmetric * @param[out] expanded - if symmetric matrix is expanded to general format - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre `tmp` is an empty list! @@ -515,29 +545,30 @@ namespace ReSolve * @post `tmp` list is overwritten with matrix elements read from the input * stream. */ - static void createMatrixFromFileAsList(std::istream& file, - bool is_expand_symmetric, + static void createMatrixFromFileAsList(std::istream& file, + bool is_expand_symmetric, std::list& tmp, - index_type& n, - index_type& m, - index_type& nnz, - bool& symmetric, - bool& expanded) + index_type& n, + index_type& m, + index_type& nnz, + bool& symmetric, + bool& expanded) { std::stringstream ss; - std::string line; - m = 0; - n = 0; - nnz = 0; + std::string line; + m = 0; + n = 0; + nnz = 0; symmetric = false; - expanded = true; + expanded = true; // Parse header and check if matrix is symmetric while (std::getline(file, line)) { if (line.at(0) != '%') break; - if (line.find("symmetric") != std::string::npos) { + if (line.find("symmetric") != std::string::npos) + { symmetric = true; expanded = is_expand_symmetric; } @@ -561,12 +592,12 @@ namespace ReSolve } /** - * @brief - * + * @brief + * * @param[in] file - std::istream to Matrix Market file (data). * @param[in] A - sparse matrix to be updated * @param[out] tmp - temporary list with matrix entries - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre `tmp` is an empty list! @@ -577,37 +608,41 @@ namespace ReSolve * @post `tmp` list is overwritten with matrix elements read from the input * stream. * @invariant Elements of `A` are unchanged in this function but they are - * expected to be overwritten with values in `tmp` later in the code. + * expected to be overwritten with values in `tmp` later in the code. */ - static void createMatrixFromFileAsList(std::istream& file, - matrix::Sparse* A, + static void createMatrixFromFileAsList(std::istream& file, + matrix::Sparse* A, std::list& tmp) { std::stringstream ss; - std::string line; + std::string line; // Default is a general matrix - bool symmetric = false; + bool symmetric = false; // Default is not to expand symmetric matrix bool is_expand_symmetric = false; // Parse header and check if matrix is symmetric std::getline(file, line); - if (line.find("symmetric") != std::string::npos) { + if (line.find("symmetric") != std::string::npos) + { symmetric = true; } - if (symmetric != A->symmetric()) { + if (symmetric != A->symmetric()) + { Logger::error() << "In function updateMatrixFromFile:" << "Source data does not match the symmetry of destination matrix.\n"; } // If the destination matrix is symmetric and expanded, then expand data. - if (A->symmetric()) { + if (A->symmetric()) + { is_expand_symmetric = A->expanded(); } // Skip the header comments - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } // Read the first line with matrix sizes @@ -616,7 +651,8 @@ namespace ReSolve ss >> n >> m >> nnz; // Make sure input data matches matrix A size - if ((A->getNumRows() != n) || (A->getNumColumns() != m)) { + if ((A->getNumRows() != n) || (A->getNumColumns() != m)) + { Logger::error() << "Wrong matrix size: " << A->getNumRows() << "x" << A->getNumColumns() << ". Cannot update! \n "; @@ -631,10 +667,8 @@ namespace ReSolve // Remove duplicates in `tmp` list. Complexity O(NNZ) removeDuplicates(tmp); - } - // Commented out; needed for debugging only. // void print_list(std::list& l) // { @@ -647,35 +681,39 @@ namespace ReSolve /** * @brief Loads data from Matrix Market file to a std::list. - * + * * @param[in] file - std::istream to Matrix Market file (data). * @param[in] is_expand_symmetric - whether to expand symmetric matrix. * @param[out] tmp - temporary list with matrix entries * @return int - 0 if successful, error code otherwise. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre `tmp` is an empty list! * @post `tmp` list is overwritten with matrix elements read from the input * stream. */ - int loadToList(std::istream& file, - bool is_expand_symmetric, + int loadToList(std::istream& file, + bool is_expand_symmetric, std::list& tmp) { index_type i, j; - real_type v; + real_type v; // If the `tmp` list is not empty, clear it. - if (tmp.size() != 0) { + if (tmp.size() != 0) + { tmp.clear(); } - while (file >> i >> j >> v) { + while (file >> i >> j >> v) + { MatrixElementTriplet triplet(i - 1, j - 1, v); tmp.push_back(std::move(triplet)); - if (is_expand_symmetric) { - if (i != j) { + if (is_expand_symmetric) + { + if (i != j) + { MatrixElementTriplet triplet(j - 1, i - 1, v); tmp.push_back(std::move(triplet)); } @@ -687,10 +725,10 @@ namespace ReSolve /** * @brief Removes duplicates from `tmp` list. - * - * @param[in,out] tmp - List with matrix entries. + * + * @param[in,out] tmp - List with matrix entries. * @return int - 0 if successful, error code otherwise. - * + * * @pre `tmp` contains matrix elements. * @post Duplicates in `tmp` are added in place to the first instance * of that matrix element. @@ -702,7 +740,8 @@ namespace ReSolve { std::list::iterator it_tmp = it; it++; - if (*it == *it_tmp) { + if (*it == *it_tmp) + { *it += *it_tmp; tmp.erase(it_tmp); } @@ -713,11 +752,11 @@ namespace ReSolve /** * @brief Writes data from the std::list to COO matrix. - * + * * @param[in] tmp - List with matrix entries * @param[out] A - Output COO matrix * @return int - 0 if successful, error code otherwise. - * + * * @pre `tmp` contains matrix elements sorted in row-major order and * without duplicates. * @pre Number of `tmp` elements is not larger than number of nonzeros @@ -728,18 +767,19 @@ namespace ReSolve { index_type* coo_rows = A->getRowData(memory::HOST); index_type* coo_cols = A->getColData(memory::HOST); - real_type* coo_vals = A->getValues( memory::HOST); + real_type* coo_vals = A->getValues(memory::HOST); index_type nnz = static_cast(tmp.size()); - if (A->getNnz() < nnz) { + if (A->getNnz() < nnz) + { Logger::error() << "Too many NNZs: " << nnz << ". Cannot update! \n "; return 1; } A->setNnz(nnz); - index_type element_counter = 0; - std::list::const_iterator it = tmp.begin(); + index_type element_counter = 0; + std::list::const_iterator it = tmp.begin(); while (it != tmp.end()) { coo_rows[element_counter] = it->getRowIdx(); @@ -755,14 +795,13 @@ namespace ReSolve return 0; } - /** * @brief Writes data from the std::list to CSR matrix. - * + * * @param[in] tmp - List with matrix entries * @param[out] A - Output CSR matrix * @return int - 0 if successful, error code otherwise. - * + * * @pre `tmp` contains matrix elements sorted in row-major order and * without duplicates. * @pre Number of `tmp` elements is not larger than number of nonzeros @@ -773,11 +812,12 @@ namespace ReSolve { index_type* csr_rows = A->getRowData(memory::HOST); index_type* csr_cols = A->getColData(memory::HOST); - real_type* csr_vals = A->getValues( memory::HOST); + real_type* csr_vals = A->getValues(memory::HOST); // Set number of nonzeros index_type nnz = static_cast(tmp.size()); - if (A->getNnz() < nnz) { + if (A->getNnz() < nnz) + { Logger::error() << "Too many NNZs: " << nnz << ". Cannot update! \n "; return 1; @@ -785,19 +825,21 @@ namespace ReSolve A->setNnz(nnz); // Set all iterators - index_type column_index_counter = 0; - index_type row_pointer_counter = 0; - std::list::const_iterator it = tmp.begin(); + index_type column_index_counter = 0; + index_type row_pointer_counter = 0; + std::list::const_iterator it = tmp.begin(); // Set first row pointer to zero csr_rows[0] = 0; csr_cols[0] = it->getColIdx(); csr_vals[0] = it->getValue(); - for (index_type i = 1; i < nnz; ++i) { + for (index_type i = 1; i < nnz; ++i) + { std::list::const_iterator it_tmp = it; it++; - if (it->getRowIdx() != it_tmp->getRowIdx()) { + if (it->getRowIdx() != it_tmp->getRowIdx()) + { row_pointer_counter++; csr_rows[row_pointer_counter] = i; } diff --git a/resolve/matrix/io.hpp b/resolve/matrix/io.hpp index de50da95e..961bb385f 100644 --- a/resolve/matrix/io.hpp +++ b/resolve/matrix/io.hpp @@ -1,27 +1,43 @@ +#pragma once + #include -namespace ReSolve { namespace vector { - class Vector; -}} +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } +} // namespace ReSolve -namespace ReSolve { namespace matrix { - class Sparse; - class Coo; - class Csr; -}} +namespace ReSolve +{ + namespace matrix + { + class Sparse; + class Coo; + class Csr; + } // namespace matrix +} // namespace ReSolve -namespace ReSolve { namespace io { - using vector_type = vector::Vector; +namespace ReSolve +{ + namespace io + { + using vector_type = vector::Vector; - matrix::Coo* createCooFromFile(std::istream& file, bool is_expand_symmetric = true); - matrix::Csr* createCsrFromFile(std::istream& file, bool is_expand_symmetric = true); - void updateMatrixFromFile(std::istream& file, matrix::Coo* A); - void updateMatrixFromFile(std::istream& file, matrix::Csr* A); - real_type* createArrayFromFile(std::istream& file); - vector::Vector* createVectorFromFile(std::istream& file); - void updateArrayFromFile(std::istream& file, real_type** rhs); - void updateVectorFromFile(std::istream& file, vector::Vector* rhs); + matrix::Coo* createCooFromFile(std::istream& file, bool is_expand_symmetric = true); + matrix::Csr* createCsrFromFile(std::istream& file, bool is_expand_symmetric = true); + void updateMatrixFromFile(std::istream& file, matrix::Coo* A); + void updateMatrixFromFile(std::istream& file, matrix::Csr* A); + real_type* createArrayFromFile(std::istream& file); + vector::Vector* createVectorFromFile(std::istream& file); + void updateArrayFromFile(std::istream& file, real_type** rhs); + void updateVectorFromFile(std::istream& file, vector::Vector* rhs); - int writeMatrixToFile(matrix::Sparse* A, std::ostream& file_out); - int writeVectorToFile(vector_type* vec_x, std::ostream &file_out); -}} // ReSolve::io + int writeMatrixToFile(matrix::Sparse* A, std::ostream& file_out); + int writeVectorToFile(vector_type* vec_x, std::ostream& file_out); + } // namespace io +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCpu.cpp b/resolve/random/RandomSketchingCountCpu.cpp index 9dfc336f7..4471fe1f2 100644 --- a/resolve/random/RandomSketchingCountCpu.cpp +++ b/resolve/random/RandomSketchingCountCpu.cpp @@ -3,17 +3,18 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingCountCuda class. - * + * */ -#include #include -#include -namespace ReSolve +#include +#include + +namespace ReSolve { /** * @brief Default constructor - * + * * @post All class variables set to nullptr. */ RandomSketchingCountCpu::RandomSketchingCountCpu() @@ -23,23 +24,23 @@ namespace ReSolve /// Destructor RandomSketchingCountCpu::~RandomSketchingCountCpu() { - delete [] h_labels_; - delete [] h_flip_; + delete[] h_labels_; + delete[] h_flip_; } /** * @brief Sketching method using CountSketch algorithm. - * + * * Implements actual sketching process. * * @param[in] input - Vector size _n_ - * @param[out] output - Vector size _k_ + * @param[out] output - Vector size _k_ * * @pre Both input and output variables are initialized and of correct size. - * Setup has been run at least once - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * Setup has been run at least once + * + * @return 0 if successful, !=0 otherwise (TODO). + * */ int RandomSketchingCountCpu::Theta(vector_type* input, vector_type* output) { @@ -54,39 +55,43 @@ namespace ReSolve /** * @brief Sketching setup method for CountSketch algorithm. - * + * * Sets up parameters, sampling matrices, permuations, etc. - * + * * @param[in] n - Size of base vector - * @param[in] k - Size of sketch + * @param[in] k - Size of sketch + * + * @pre _n_ > _k_. * - * @pre _n_ > _k_. - * * @post The arrays needed for performing sketches with CountSketch (_flip_ and _labels_ ) - * are initialized. If GPU is enabled, the arrays will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). + * are initialized. If GPU is enabled, the arrays will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingCountCpu::setup(index_type n, index_type k) { k_rand_ = k; - n_ = n; + n_ = n; srand(static_cast(time(nullptr))); - //allocate labeling scheme vector and move to GPU + // allocate labeling scheme vector and move to GPU h_labels_ = new index_type[n_]; - //allocate sgn - a vector of flip signs - h_flip_ = new index_type[n_]; + // allocate sgn - a vector of flip signs + h_flip_ = new index_type[n_]; - //populate labeling scheme (can be done on the gpu really) - //to be fixed, this can be done on the GPU - for (int i=0; i _k_. _k_ value DID NOT CHANGE from the time the setup function * was executed. - * + * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are reset to new values. If GPU is enabled, the - * arrays will be copied to the GPU, as well - * + * arrays will be copied to the GPU, as well + * * @return 0 if successful, !=0 otherwise (TODO). - * + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingCountCpu::reset() { - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { h_labels_[i] = rand() % k_rand_; - int r = rand()%100; - if (r < 50) { + int r = rand() % 100; + if (r < 50) + { h_flip_[i] = -1; - } else { + } + else + { h_flip_[i] = 1; } } return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCpu.hpp b/resolve/random/RandomSketchingCountCpu.hpp index e55351b95..7f82d85d6 100644 --- a/resolve/random/RandomSketchingCountCpu.hpp +++ b/resolve/random/RandomSketchingCountCpu.hpp @@ -3,14 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingCountCuda class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector @@ -20,34 +21,34 @@ namespace ReSolve { /** * @brief Count sketching implementation for CPU. - * + * */ class RandomSketchingCountCpu : public RandomSketchingImpl { - private: - using vector_type = vector::Vector; + private: + using vector_type = vector::Vector; - public: - // constructor - RandomSketchingCountCpu(); + public: + // constructor + RandomSketchingCountCpu(); - // destructor - virtual ~RandomSketchingCountCpu(); + // destructor + virtual ~RandomSketchingCountCpu(); - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector - index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random - index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random + index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random + index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random - // MemoryHandler mem_; ///< Device memory manager object + // MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCuda.cpp b/resolve/random/RandomSketchingCountCuda.cpp index 31d93d5a9..674f44383 100644 --- a/resolve/random/RandomSketchingCountCuda.cpp +++ b/resolve/random/RandomSketchingCountCuda.cpp @@ -3,18 +3,19 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingCountCuda class. - * + * */ +#include + #include +#include #include -#include -#include -namespace ReSolve +namespace ReSolve { /** * @brief Default constructor - * + * * @post All class variables set to nullptr. */ RandomSketchingCountCuda::RandomSketchingCountCuda() @@ -24,25 +25,25 @@ namespace ReSolve /// Destructor RandomSketchingCountCuda::~RandomSketchingCountCuda() { - delete [] h_labels_; - delete [] h_flip_; + delete[] h_labels_; + delete[] h_flip_; mem_.deleteOnDevice(d_labels_); mem_.deleteOnDevice(d_flip_); } /** * @brief Sketching method using CountSketch algorithm. - * + * * Implements actual sketching process. * * @param[in] input - Vector size _n_ - * @param[out] output - Vector size _k_ + * @param[out] output - Vector size _k_ * * @pre Both input and output variables are initialized and of correct size. - * Setup has been run at least once - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * Setup has been run at least once + * + * @return 0 if successful, !=0 otherwise (TODO). + * */ int RandomSketchingCountCuda::Theta(vector_type* input, vector_type* output) { @@ -51,7 +52,7 @@ namespace ReSolve k_rand_, d_labels_, d_flip_, - input->getData(memory::DEVICE), + input->getData(memory::DEVICE), output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; @@ -59,46 +60,50 @@ namespace ReSolve /** * @brief Sketching setup method for CountSketch algorithm. - * + * * Sets up parameters, sampling matrices, permuations, etc. - * + * * @param[in] n - Size of base vector - * @param[in] k - Size of sketch + * @param[in] k - Size of sketch + * + * @pre _n_ > _k_. * - * @pre _n_ > _k_. - * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are initialized. If GPU is enabled, the arrays - * will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). + * will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingCountCuda::setup(index_type n, index_type k) { k_rand_ = k; - n_ = n; + n_ = n; srand(static_cast(time(nullptr))); - //allocate labeling scheme vector and move to GPU + // allocate labeling scheme vector and move to GPU h_labels_ = new int[n_]; - //allocate sgn - a vector of flip signs - h_flip_ = new int[n_]; + // allocate sgn - a vector of flip signs + h_flip_ = new int[n_]; - //populate labeling scheme (can be done on the gpu really) - //to be fixed, this can be done on the GPU - for (int i=0; i _k_. _k_ value DID NOT CHANGE from the time the setup function * was executed. - * + * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are reset to new values. If GPU is enabled, the - * arrays will be copied to the GPU, as well - * + * arrays will be copied to the GPU, as well + * * @return 0 if successful, !=0 otherwise (TODO). - * + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingCountCuda::reset() // if needed can be reset (like when Krylov method restarts) { - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { h_labels_[i] = rand() % k_rand_; - int r = rand()%100; - if (r < 50) { + int r = rand() % 100; + if (r < 50) + { h_flip_[i] = -1; - } else { + } + else + { h_flip_[i] = 1; } } @@ -143,4 +152,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCuda.hpp b/resolve/random/RandomSketchingCountCuda.hpp index d81a7a551..e4cbf28b3 100644 --- a/resolve/random/RandomSketchingCountCuda.hpp +++ b/resolve/random/RandomSketchingCountCuda.hpp @@ -3,14 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingCountCuda class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector @@ -20,37 +21,37 @@ namespace ReSolve { /** * @brief Count sketch implementation for CUDA device. - * + * */ class RandomSketchingCountCuda : public RandomSketchingImpl { using vector_type = vector::Vector; - public: - // constructor - RandomSketchingCountCuda(); + public: + // constructor + RandomSketchingCountCuda(); - // destructor - virtual ~RandomSketchingCountCuda(); + // destructor + virtual ~RandomSketchingCountCuda(); - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector - index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random - index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random + index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random + index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random - index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart - index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart + index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart + index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountHip.cpp b/resolve/random/RandomSketchingCountHip.cpp index eb2fd0d0c..db4496177 100644 --- a/resolve/random/RandomSketchingCountHip.cpp +++ b/resolve/random/RandomSketchingCountHip.cpp @@ -3,18 +3,19 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingCountHip class. - * + * */ +#include + #include +#include #include -#include -#include -namespace ReSolve +namespace ReSolve { /** * @brief Default constructor - * + * * @post All class variables set to nullptr. */ RandomSketchingCountHip::RandomSketchingCountHip() @@ -24,25 +25,25 @@ namespace ReSolve /// Destructor RandomSketchingCountHip::~RandomSketchingCountHip() { - delete [] h_labels_; - delete [] h_flip_; + delete[] h_labels_; + delete[] h_flip_; mem_.deleteOnDevice(d_labels_); mem_.deleteOnDevice(d_flip_); } /** * @brief Sketching method using CountSketch algorithm. - * + * * Implements actual sketching process. * * @param[in] input - Vector size _n_ - * @param[out] output - Vector size _k_ + * @param[out] output - Vector size _k_ * * @pre Both input and output variables are initialized and of correct size. * Setup has been run at least once. - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * + * @return 0 if successful, !=0 otherwise (TODO). + * */ int RandomSketchingCountHip::Theta(vector_type* input, vector_type* output) { @@ -51,7 +52,7 @@ namespace ReSolve k_rand_, d_labels_, d_flip_, - input->getData(memory::DEVICE), + input->getData(memory::DEVICE), output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; @@ -59,46 +60,50 @@ namespace ReSolve /** * @brief Sketching setup method for CountSketch algorithm. - * + * * Sets up parameters, sampling matrices, permuations, etc. - * + * * @param[in] n - Size of base vector - * @param[in] k - Size of sketch + * @param[in] k - Size of sketch + * + * @pre _n_ > _k_. * - * @pre _n_ > _k_. - * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are initialized. If GPU is enabled, the arrays - * will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). + * will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingCountHip::setup(index_type n, index_type k) { k_rand_ = k; - n_ = n; + n_ = n; srand(static_cast(time(nullptr))); - //allocate labeling scheme vector and move to GPU + // allocate labeling scheme vector and move to GPU h_labels_ = new int[n_]; - //allocate sgn - a vector of flip signs - h_flip_ = new int[n_]; + // allocate sgn - a vector of flip signs + h_flip_ = new int[n_]; - //populate labeling scheme (can be done on the gpu really) - //to be fixed, this can be done on the GPU - for (int i=0; i _k_. _k_ value DID NOT CHANGE from the time the setup function * was executed. - * + * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are reset to new values. If GPU is enabled, the - * arrays will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * arrays will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingCountHip::reset() // if needed can be reset (like when Krylov method restarts) { - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { h_labels_[i] = rand() % k_rand_; - int r = rand()%100; - if (r < 50) { + int r = rand() % 100; + if (r < 50) + { h_flip_[i] = -1; - } else { + } + else + { h_flip_[i] = 1; } } @@ -143,4 +152,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountHip.hpp b/resolve/random/RandomSketchingCountHip.hpp index ff7ef5c76..c55137a51 100644 --- a/resolve/random/RandomSketchingCountHip.hpp +++ b/resolve/random/RandomSketchingCountHip.hpp @@ -3,14 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingCountHip class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector @@ -20,35 +21,36 @@ namespace ReSolve { /** * @brief Count sketch implementation for a HIP device. - * + * */ class RandomSketchingCountHip : public RandomSketchingImpl { - private: - using vector_type = vector::Vector; - public: - // constructor - RandomSketchingCountHip(); - // destructor - virtual ~RandomSketchingCountHip(); + private: + using vector_type = vector::Vector; + + public: + // constructor + RandomSketchingCountHip(); + // destructor + virtual ~RandomSketchingCountHip(); - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector - index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random - index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random + index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random + index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random - index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart - index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart + index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart + index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCpu.cpp b/resolve/random/RandomSketchingFWHTCpu.cpp index 14e6dfd64..c610f49ea 100644 --- a/resolve/random/RandomSketchingFWHTCpu.cpp +++ b/resolve/random/RandomSketchingFWHTCpu.cpp @@ -3,25 +3,26 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingFWHTCpu class. - * + * */ #include -#include #include +#include + +#include #include -#include +#include #include -#include -#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; /** * @brief Default constructor - * + * * @post All class variables are set to nullptr. */ RandomSketchingFWHTCpu::RandomSketchingFWHTCpu() @@ -30,122 +31,130 @@ namespace ReSolve /** * @brief destructor - * + * */ RandomSketchingFWHTCpu::~RandomSketchingFWHTCpu() { - delete [] h_seq_; - delete [] h_D_; - delete [] h_perm_; - delete [] d_aux_; + delete[] h_seq_; + delete[] h_D_; + delete[] h_perm_; + delete[] d_aux_; } - /** + /** * @brief Sketching method - it sketches a given vector (shrinks its size) - * + * * Implements actual sketching process. * - * @param[in] input - input vector, size _n_ - * @param[out] output - output vector, size _k_ - * + * @param[in] input - input vector, size _n_ + * @param[out] output - output vector, size _k_ + * * @pre both vectors are allocated. Setup function from this class has been called. * @warning normal FWHT function requires scaling by 1/k. This function does not scale. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCpu::Theta(vector_type* input, vector_type* output) { std::memset(d_aux_, 0.0, static_cast(N_) * sizeof(real_type)); - cpu::FWHT_scaleByD(n_, + cpu::FWHT_scaleByD(n_, h_D_, input->getData(memory::HOST), - d_aux_); + d_aux_); cpu::FWHT(1, log2N_, d_aux_); - cpu::FWHT_select(k_rand_, - h_perm_, - d_aux_, + cpu::FWHT_select(k_rand_, + h_perm_, + d_aux_, output->getData(memory::HOST)); return 0; } - /** - * @brief Sketching method setup. - * + /** + * @brief Sketching method setup. + * * This function allocated P(erm), D (diagonal scaling matrix) and populates * them. It also allocates auxiliary arrays. * * @param[in] n - size of base (non-sketched) vector - * @param[in] k - size of sketched vector. - * + * @param[in] k - size of sketched vector. + * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCpu::setup(index_type n, index_type k) { - k_rand_ = k; - n_ = n; + k_rand_ = k; + n_ = n; // pad to the nearest power of 2 - real_type N_real = std::pow(2.0, std::ceil(std::log(n_)/std::log(2.0))); - if (N_real > static_cast(std::numeric_limits::max())) { + real_type N_real = std::pow(2.0, std::ceil(std::log(n_) / std::log(2.0))); + if (N_real > static_cast(std::numeric_limits::max())) + { out::error() << "Exceeded numerical limits of index_type ...\n"; return 1; } - N_ = static_cast(N_real); - log2N_ = static_cast(std::log2(N_real)); - one_over_k_ = 1.0/std::sqrt(static_cast(k_rand_)); + N_ = static_cast(N_real); + log2N_ = static_cast(std::log2(N_real)); + one_over_k_ = 1.0 / std::sqrt(static_cast(k_rand_)); srand(static_cast(time(nullptr))); h_seq_ = new index_type[N_]; - h_perm_ = new index_type[k_rand_]; - h_D_ = new index_type[n_]; + h_perm_ = new index_type[k_rand_]; + h_D_ = new index_type[n_]; int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; - } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + } + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i){ + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50){ + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } // Allocate auxiliary data array - d_aux_ = new real_type[N_]; + d_aux_ = new real_type[N_]; return 0; } - /** + /** * @brief Reset values in the arrays used for sketching. - * + * * Sketching can be reset, similar to Krylov method restarts. * If the solver restarts, call this method between restarts. * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). - * + * @return 0 if successful, !=0 otherwise (TODO). + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingFWHTCpu::reset() @@ -155,31 +164,38 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50) { + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCpu.hpp b/resolve/random/RandomSketchingFWHTCpu.hpp index a582a7fb6..82eb0ed5b 100644 --- a/resolve/random/RandomSketchingFWHTCpu.hpp +++ b/resolve/random/RandomSketchingFWHTCpu.hpp @@ -3,54 +3,56 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingFWHTCpu class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector { class Vector; } - + /** * @brief Fast Walsh-Hadamard transform implementation using CPU backend. - * + * */ class RandomSketchingFWHTCpu : public RandomSketchingImpl { - private: - using vector_type = vector::Vector; - public: - RandomSketchingFWHTCpu(); - virtual ~RandomSketchingFWHTCpu(); - - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); - - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) - - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector - - index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm - index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s - index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ - - real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. - - index_type N_{0}; ///< padded vector size - index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it - real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation - - // MemoryHandler mem_; ///< Device memory manager object + private: + using vector_type = vector::Vector; + + public: + RandomSketchingFWHTCpu(); + virtual ~RandomSketchingFWHTCpu(); + + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); + + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) + + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector + + index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm + index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s + index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ + + real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. + + index_type N_{0}; ///< padded vector size + index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it + real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation + + // MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCuda.cpp b/resolve/random/RandomSketchingFWHTCuda.cpp index 2544c49df..6ad615407 100644 --- a/resolve/random/RandomSketchingFWHTCuda.cpp +++ b/resolve/random/RandomSketchingFWHTCuda.cpp @@ -3,25 +3,26 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingFWHTCuda class. - * + * */ #include -#include #include +#include + +#include #include -#include +#include #include -#include -#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; /** * @brief Default constructor - * + * */ RandomSketchingFWHTCuda::RandomSketchingFWHTCuda() { @@ -29,118 +30,126 @@ namespace ReSolve /** * @brief Destructor - * + * */ RandomSketchingFWHTCuda::~RandomSketchingFWHTCuda() { using namespace memory; - delete [] h_seq_; - delete [] h_D_; - delete [] h_perm_; + delete[] h_seq_; + delete[] h_D_; + delete[] h_perm_; mem_.deleteOnDevice(d_D_); mem_.deleteOnDevice(d_perm_); mem_.deleteOnDevice(d_aux_); } - /** + /** * @brief Sketching method - it sketches a given vector (shrinks its size) - * + * * Implements actual sketching process. * - * @param[in] input - input vector, size _n_ - * @param[out] output - output vector, size _k_ - * + * @param[in] input - input vector, size _n_ + * @param[out] output - output vector, size _k_ + * * @pre both vectors are allocated. Setup function from this class has been called. * @warning normal FWHT function requires scaling by 1/k. This function does not scale. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCuda::Theta(vector_type* input, vector_type* output) { mem_.setZeroArrayOnDevice(d_aux_, N_); - cuda::FWHT_scaleByD(n_, + cuda::FWHT_scaleByD(n_, d_D_, - input->getData(memory::DEVICE), - d_aux_); + input->getData(memory::DEVICE), + d_aux_); mem_.deviceSynchronize(); cuda::FWHT(1, log2N_, d_aux_); mem_.deviceSynchronize(); - cuda::FWHT_select(k_rand_, - d_perm_, - d_aux_, - output->getData(memory::DEVICE)); + cuda::FWHT_select(k_rand_, + d_perm_, + d_aux_, + output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; } - /** - * @brief Sketching method setup. - * + /** + * @brief Sketching method setup. + * * This function allocated P(erm), D (diagonal scaling matrix) and populates * them. It also allocates auxiliary arrays. * * @param[in] n - size of base (non-sketched) vector - * @param[in] k - size of sketched vector. - * + * @param[in] k - size of sketched vector. + * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCuda::setup(index_type n, index_type k) { - k_rand_ = k; - n_ = n; + k_rand_ = k; + n_ = n; // pad to the nearest power of 2 - real_type N_real = std::pow(2.0, std::ceil(std::log(n_)/std::log(2.0))); - if (N_real > static_cast(std::numeric_limits::max())) { + real_type N_real = std::pow(2.0, std::ceil(std::log(n_) / std::log(2.0))); + if (N_real > static_cast(std::numeric_limits::max())) + { out::error() << "Exceeded numerical limits of index_type ...\n"; return 1; } - N_ = static_cast(N_real); - log2N_ = static_cast(std::log2(N_real)); - one_over_k_ = 1.0/std::sqrt(static_cast(k_rand_)); + N_ = static_cast(N_real); + log2N_ = static_cast(std::log2(N_real)); + one_over_k_ = 1.0 / std::sqrt(static_cast(k_rand_)); srand(static_cast(time(nullptr))); h_seq_ = new int[N_]; - h_perm_ = new int[k_rand_]; - h_D_ = new int[n_]; + h_perm_ = new int[k_rand_]; + h_D_ = new int[n_]; int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; - } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + } + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i){ + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50){ + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } // allocate on device - mem_.allocateArrayOnDevice(&d_perm_, k_rand_); - mem_.allocateArrayOnDevice(&d_D_, n_); - mem_.allocateArrayOnDevice(&d_aux_, N_); + mem_.allocateArrayOnDevice(&d_perm_, k_rand_); + mem_.allocateArrayOnDevice(&d_D_, n_); + mem_.allocateArrayOnDevice(&d_aux_, N_); // then copy mem_.copyArrayHostToDevice(d_perm_, h_perm_, k_rand_); mem_.copyArrayHostToDevice(d_D_, h_D_, n_); @@ -148,16 +157,16 @@ namespace ReSolve return 0; } - /** + /** * @brief Reset values in the arrays used for sketching. - * + * * Sketching can be reset, similar to Krylov method restarts. * If the solver restarts, call this method between restarts. * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). - * + * @return 0 if successful, !=0 otherwise (TODO). + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingFWHTCuda::reset() @@ -167,27 +176,34 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50) { + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } @@ -198,4 +214,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCuda.hpp b/resolve/random/RandomSketchingFWHTCuda.hpp index 03eee20b0..cbfd6e02a 100644 --- a/resolve/random/RandomSketchingFWHTCuda.hpp +++ b/resolve/random/RandomSketchingFWHTCuda.hpp @@ -3,55 +3,57 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingFWHTCuda class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector { class Vector; } - + /** * @brief Fast Walsh-Hadamard transform implementation using CUDA backend. - * + * */ class RandomSketchingFWHTCuda : public RandomSketchingImpl { using vector_type = vector::Vector; - public: - RandomSketchingFWHTCuda(); - virtual ~RandomSketchingFWHTCuda(); - - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); - - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) - - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector - - index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm - index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s - index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ - - index_type* d_D_{nullptr}; ///< device mirror of D - index_type* d_perm_{nullptr}; ///< device mirror of h_perm - real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. - - index_type N_{0}; ///< padded vector size - index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it - real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation - - MemoryHandler mem_; ///< Device memory manager object + + public: + RandomSketchingFWHTCuda(); + virtual ~RandomSketchingFWHTCuda(); + + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); + + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) + + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector + + index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm + index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s + index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ + + index_type* d_D_{nullptr}; ///< device mirror of D + index_type* d_perm_{nullptr}; ///< device mirror of h_perm + real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. + + index_type N_{0}; ///< padded vector size + index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it + real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation + + MemoryHandler mem_; ///< Device memory manager object }; } // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTHip.cpp b/resolve/random/RandomSketchingFWHTHip.cpp index 20b31915d..e770ddfe0 100644 --- a/resolve/random/RandomSketchingFWHTHip.cpp +++ b/resolve/random/RandomSketchingFWHTHip.cpp @@ -4,25 +4,26 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingFWHTHip class. - * + * */ #include -#include #include +#include + +#include #include -#include +#include #include -#include -#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; /** * @brief Default constructor - * + * */ RandomSketchingFWHTHip::RandomSketchingFWHTHip() { @@ -30,83 +31,84 @@ namespace ReSolve /** * @brief Destructor - * + * * @todo Add boolean flag indicating setup (allocations) are done. - * + * */ RandomSketchingFWHTHip::~RandomSketchingFWHTHip() { using namespace memory; - delete [] h_seq_; - delete [] h_D_; - delete [] h_perm_; + delete[] h_seq_; + delete[] h_D_; + delete[] h_perm_; mem_.deleteOnDevice(d_D_); mem_.deleteOnDevice(d_perm_); mem_.deleteOnDevice(d_aux_); } - /** + /** * @brief Sketching method - it sketches a given vector (shrinks its size) - * + * * Implements actual sketching process. * - * @param[in] input - input vector of size _n_ - * @param[out] output - output vector of size _k_ - * + * @param[in] input - input vector of size _n_ + * @param[out] output - output vector of size _k_ + * * @pre both vectors are allocated. Setup function from this class has been called. * @warning normal FWHT function requires scaling by 1/k. This function does not scale. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTHip::Theta(vector_type* input, vector_type* output) { mem_.setZeroArrayOnDevice(d_aux_, N_); - hip::FWHT_scaleByD(n_, + hip::FWHT_scaleByD(n_, d_D_, - input->getData(memory::DEVICE), - d_aux_); + input->getData(memory::DEVICE), + d_aux_); mem_.deviceSynchronize(); hip::FWHT(1, log2N_, d_aux_); mem_.deviceSynchronize(); - hip::FWHT_select(k_rand_, - d_perm_, - d_aux_, - output->getData(memory::DEVICE)); + hip::FWHT_select(k_rand_, + d_perm_, + d_aux_, + output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; } - /** - * @brief Sketching method setup. - * + /** + * @brief Sketching method setup. + * * This function allocated P(erm), D (diagonal scaling matrix) and populates * them. It also allocates auxiliary arrays. * * * @param[in] n - size of base (non-sketched) vector - * @param[in] k - size of sketched vector. - * + * @param[in] k - size of sketched vector. + * * @post Everything is set up so you can call Theta. * - * @return 0 of successful, !=0 otherwise. + * @return 0 of successful, !=0 otherwise. */ int RandomSketchingFWHTHip::setup(index_type n, index_type k) { - k_rand_ = k; - n_ = n; + k_rand_ = k; + n_ = n; // pad to the nearest power of 2 - real_type N_real = std::pow(2.0, std::ceil(std::log(n_)/std::log(2.0))); - if (N_real > static_cast(std::numeric_limits::max())) { + real_type N_real = std::pow(2.0, std::ceil(std::log(n_) / std::log(2.0))); + if (N_real > static_cast(std::numeric_limits::max())) + { out::error() << "Exceeded numerical limits of index_type ...\n"; return 1; } - N_ = static_cast(N_real); - log2N_ = static_cast(std::log2(N_real)); - one_over_k_ = 1.0/std::sqrt(static_cast(k_rand_)); + N_ = static_cast(N_real); + log2N_ = static_cast(std::log2(N_real)); + one_over_k_ = 1.0 / std::sqrt(static_cast(k_rand_)); srand(static_cast(time(nullptr))); @@ -117,34 +119,41 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; - } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + } + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i){ + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50){ + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } // allocate on device - mem_.allocateArrayOnDevice(&d_perm_, k_rand_); - mem_.allocateArrayOnDevice(&d_D_, n_); - mem_.allocateArrayOnDevice(&d_aux_, N_); + mem_.allocateArrayOnDevice(&d_perm_, k_rand_); + mem_.allocateArrayOnDevice(&d_D_, n_); + mem_.allocateArrayOnDevice(&d_aux_, N_); // then copy mem_.copyArrayHostToDevice(d_perm_, h_perm_, k_rand_); mem_.copyArrayHostToDevice(d_D_, h_D_, n_); @@ -152,16 +161,16 @@ namespace ReSolve return 0; } - /** + /** * @brief Reset values in the arrays used for sketching. - * + * * Sketching can be reset, similar to Krylov method restarts. * If the solver restarts, call this method between restarts. * * @post Everything is set up so you can call Theta. * * @return 0 of successful, -1 otherwise. - * + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingFWHTHip::reset() @@ -171,27 +180,34 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50) { + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } @@ -202,4 +218,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTHip.hpp b/resolve/random/RandomSketchingFWHTHip.hpp index 746d88711..0892a2cb7 100644 --- a/resolve/random/RandomSketchingFWHTHip.hpp +++ b/resolve/random/RandomSketchingFWHTHip.hpp @@ -3,58 +3,60 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingFWHTHip class. - * + * * @copyright Copyright (c) 2024 - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector { class Vector; } - + /** * @brief Fast Walsh-Hadamard transform implementation using HIP backend. - * + * */ class RandomSketchingFWHTHip : public RandomSketchingImpl { using vector_type = vector::Vector; - public: - RandomSketchingFWHTHip(); - virtual ~RandomSketchingFWHTHip(); - - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); - - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); - - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector - - index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm - index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s - index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ - - index_type* d_D_{nullptr}; ///< device mirror of D - index_type* d_perm_{nullptr}; ///< device mirror of h_perm - real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. - - index_type N_{0}; ///< padded vector size - index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it - real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation - - MemoryHandler mem_; ///< Device memory manager object + + public: + RandomSketchingFWHTHip(); + virtual ~RandomSketchingFWHTHip(); + + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); + + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); + + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector + + index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm + index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s + index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ + + index_type* d_D_{nullptr}; ///< device mirror of D + index_type* d_perm_{nullptr}; ///< device mirror of h_perm + real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. + + index_type N_{0}; ///< padded vector size + index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it + real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation + + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingImpl.hpp b/resolve/random/RandomSketchingImpl.hpp index 84bb80246..df8187c41 100644 --- a/resolve/random/RandomSketchingImpl.hpp +++ b/resolve/random/RandomSketchingImpl.hpp @@ -2,45 +2,44 @@ * @file RandomSketchingImpl.hpp * @author Slaven Peles (peless@ornl.gov) * @brief Pure virtual RandomSketchingImpl class. - * + * */ #pragma once #include - namespace ReSolve { namespace vector { class Vector; } -} +} // namespace ReSolve namespace ReSolve { - /** + /** * @brief Interface to random sketching implementations. - * + * * All sketching methods inherit from this class. */ class RandomSketchingImpl { - public: - RandomSketchingImpl() - { - } + public: + RandomSketchingImpl() + { + } - virtual ~RandomSketchingImpl() - { - } + virtual ~RandomSketchingImpl() + { + } - // Actual sketching process - virtual int Theta(vector::Vector* input, vector::Vector* output) = 0; + // Actual sketching process + virtual int Theta(vector::Vector* input, vector::Vector* output) = 0; - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k) = 0; + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k) = 0; - // Needed for iterative methods with restarting - virtual int reset() = 0; + // Needed for iterative methods with restarting + virtual int reset() = 0; }; } // namespace ReSolve diff --git a/resolve/random/SketchingHandler.cpp b/resolve/random/SketchingHandler.cpp index e4b1008c1..b4ed40595 100644 --- a/resolve/random/SketchingHandler.cpp +++ b/resolve/random/SketchingHandler.cpp @@ -2,79 +2,86 @@ * @file SketchingHandler.cpp * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of the SketchingHandler class - * + * */ -#include +#include "SketchingHandler.hpp" + +#include #include #include -#include +#include #include #include -#include -#include "SketchingHandler.hpp" +#include + +namespace ReSolve +{ -namespace ReSolve { - /** * @brief Constructor creates requested sketching method. - * + * * Create instance of the specified sketching method on the selected device. * - */ + */ SketchingHandler::SketchingHandler(SketchingMethod method, memory::DeviceType devtype) { - if (devtype == memory::NONE) { - switch (method) { - case LinSolverIterativeRandFGMRES::cs: - sketching_ = new RandomSketchingCountCpu(); - break; - case LinSolverIterativeRandFGMRES::fwht: - sketching_ = new RandomSketchingFWHTCpu(); - break; - default: - sketching_ = nullptr; - break; + if (devtype == memory::NONE) + { + switch (method) + { + case LinSolverIterativeRandFGMRES::cs: + sketching_ = new RandomSketchingCountCpu(); + break; + case LinSolverIterativeRandFGMRES::fwht: + sketching_ = new RandomSketchingFWHTCpu(); + break; + default: + sketching_ = nullptr; + break; } } #ifdef RESOLVE_USE_CUDA - if (devtype == memory::CUDADEVICE) { - switch (method) { - case LinSolverIterativeRandFGMRES::cs: - sketching_ = new RandomSketchingCountCuda(); - break; - case LinSolverIterativeRandFGMRES::fwht: - sketching_ = new RandomSketchingFWHTCuda(); - break; - default: - sketching_ = nullptr; - break; + if (devtype == memory::CUDADEVICE) + { + switch (method) + { + case LinSolverIterativeRandFGMRES::cs: + sketching_ = new RandomSketchingCountCuda(); + break; + case LinSolverIterativeRandFGMRES::fwht: + sketching_ = new RandomSketchingFWHTCuda(); + break; + default: + sketching_ = nullptr; + break; } } #endif #ifdef RESOLVE_USE_HIP - if (devtype == memory::HIPDEVICE) { - switch (method) { - case LinSolverIterativeRandFGMRES::cs: - sketching_ = new RandomSketchingCountHip(); - break; - case LinSolverIterativeRandFGMRES::fwht: - sketching_ = new RandomSketchingFWHTHip(); - break; - default: - sketching_ = nullptr; - break; + if (devtype == memory::HIPDEVICE) + { + switch (method) + { + case LinSolverIterativeRandFGMRES::cs: + sketching_ = new RandomSketchingCountHip(); + break; + case LinSolverIterativeRandFGMRES::fwht: + sketching_ = new RandomSketchingFWHTHip(); + break; + default: + sketching_ = nullptr; + break; } } #endif - } /** * @brief Destructor deletes the sketching method implementation. * - */ + */ SketchingHandler::~SketchingHandler() { delete sketching_; @@ -98,4 +105,4 @@ namespace ReSolve { return sketching_->reset(); } -} +} // namespace ReSolve diff --git a/resolve/random/SketchingHandler.hpp b/resolve/random/SketchingHandler.hpp index 3f23fdb79..d3cee1b78 100644 --- a/resolve/random/SketchingHandler.hpp +++ b/resolve/random/SketchingHandler.hpp @@ -2,7 +2,7 @@ * @file SketchingHandler.hpp * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of SketchingHandler class - * + * */ #pragma once #include @@ -11,6 +11,7 @@ namespace ReSolve { // Forward declarations class RandomSketchingImpl; + namespace vector { class VectorHandler; @@ -18,28 +19,29 @@ namespace ReSolve /** * @brief Class that invokes sketching method using PIMPL idiom. - * + * */ class SketchingHandler { - private: - using SketchingMethod = LinSolverIterativeRandFGMRES::SketchingMethod; - using vector_type = vector::Vector; - public: - SketchingHandler(SketchingMethod method, memory::DeviceType devtype); - ~SketchingHandler(); + private: + using SketchingMethod = LinSolverIterativeRandFGMRES::SketchingMethod; + using vector_type = vector::Vector; + + public: + SketchingHandler(SketchingMethod method, memory::DeviceType devtype); + ~SketchingHandler(); - /// Actual sketching process - int Theta(vector_type* input, vector_type* output); + /// Actual sketching process + int Theta(vector_type* input, vector_type* output); - /// Setup the parameters, sampling matrices, permuations, etc. - int setup(index_type n, index_type k); + /// Setup the parameters, sampling matrices, permuations, etc. + int setup(index_type n, index_type k); - /// Needed for iterative methods with restarting - int reset(); + /// Needed for iterative methods with restarting + int reset(); - private: - RandomSketchingImpl* sketching_{nullptr}; ///< Pointer to implementation + private: + RandomSketchingImpl* sketching_{nullptr}; ///< Pointer to implementation }; -} // namespace ReSolve \ No newline at end of file +} // namespace ReSolve diff --git a/resolve/random/cpuSketchingKernels.cpp b/resolve/random/cpuSketchingKernels.cpp index ace5b907e..515036f9c 100644 --- a/resolve/random/cpuSketchingKernels.cpp +++ b/resolve/random/cpuSketchingKernels.cpp @@ -2,11 +2,12 @@ * @file cpuSketchingKernels.cpp * @author your name (you@domain.com) * @brief CPU implementation of random sketching kernels. - * + * */ +#include "cpuSketchingKernels.h" + #include #include -#include "cpuSketchingKernels.h" namespace ReSolve { @@ -14,115 +15,124 @@ namespace ReSolve { /** * @brief Count sketch theta function. - * + * * @param[in] n - input vector size * @param[in] k - output vector size * @param[in] labels - vector of non-negative ints from 0 to k-1, length n * @param[in] flip - vector of 1s and -1s, length n * @param[in] input - vector of lengths n * @param[out] output - vector of length k - * + * * @todo Decide how to allow user to configure grid and block sizes. */ void count_sketch_theta(index_type n, index_type /* k */, index_type* labels, index_type* flip, - real_type* input, - real_type* output) + real_type* input, + real_type* output) { real_type val; - for (index_type i = 0; i < n; ++i) { - val = input[i]; - if (flip[i] != 1) { + for (index_type i = 0; i < n; ++i) + { + val = input[i]; + if (flip[i] != 1) + { val *= -1.0; - } + } output[labels[i]] += val; } } /** * @brief y = D*x - * + * * Multiply array x by diagonal matrix D and store result in array y. - * + * * @param[in] n - size of arrays x, y and matrix D. * @param[in] D - diagonal matrix (stored as integer array). * @param[in] x - input array x * @param[out] y - output array y - * + * * @pre Arrays x, y, and D are allocated to size n. * @pre Arrays x and D are initialized. - * + * * @post Array y is overwritten with D*x. */ - void FWHT_scaleByD(index_type n, + void FWHT_scaleByD(index_type n, const index_type* D, - const real_type* x, - real_type* y) + const real_type* x, + real_type* y) { - for (index_type i = 0; i < n; ++i) { - if (D[i] == 1) { + for (index_type i = 0; i < n; ++i) + { + if (D[i] == 1) + { y[i] = x[i]; - } else { + } + else + { y[i] = (-1.0) * x[i]; } - } + } } - + /** * @brief Permute _input_ using _perm_ and store in _output_. - * + * * @param[in] k - size of input and output arrays * @param[in] perm - permutation matrix (stored as an integer array) * @param[in] input - input array * @param[out] output - output array - * + * * @pre Arrays input, output, and perm are allocated to size k. * @pre Arrays input and perm are initialized. - * + * * @post Array output is overwritten with permuted values of input. */ - void FWHT_select(index_type k, + void FWHT_select(index_type k, const index_type* perm, - const real_type* input, - real_type* output) + const real_type* input, + real_type* output) { - for (index_type i = 0; i < k; ++i) { + for (index_type i = 0; i < k; ++i) + { output[i] = input[perm[i]]; - } + } } /** - * @brief - * + * @brief + * * @param[in] M - Placeholder for GPU grid size (not used here) - * @param[in] log2N - - * @param[out] h_Data - + * @param[in] log2N - + * @param[out] h_Data - */ - void FWHT(index_type /* M */, - index_type log2N, - real_type* h_Data) + void FWHT(index_type /* M */, + index_type log2N, + real_type* h_Data) { index_type h = 1; - index_type N = static_cast(std::pow(2.0,log2N)); - real_type x, y; - - while (h < N) { - for (index_type i = 0; i < N; i += 2*h) { - for (index_type j = i; j < i + h; ++j) { - x = h_Data[j]; - y = h_Data[j + h]; - h_Data[j] = x + y; + index_type N = static_cast(std::pow(2.0, log2N)); + real_type x, y; + + while (h < N) + { + for (index_type i = 0; i < N; i += 2 * h) + { + for (index_type j = i; j < i + h; ++j) + { + x = h_Data[j]; + y = h_Data[j + h]; + h_Data[j] = x + y; h_Data[j + h] = x - y; - } + } } - // note: in "normal" FWHT there is also a division by sqrt(2) here + // note: in "normal" FWHT there is also a division by sqrt(2) here h *= 2; - } + } } } // namespace cpu } // namespace ReSolve - diff --git a/resolve/random/cpuSketchingKernels.h b/resolve/random/cpuSketchingKernels.h index 84d8e3ef6..6195118b4 100644 --- a/resolve/random/cpuSketchingKernels.h +++ b/resolve/random/cpuSketchingKernels.h @@ -2,7 +2,7 @@ * @file cpuSketchingKernels.h * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @brief Function prototypes for random sketching kernels. - * + * */ #include @@ -10,23 +10,22 @@ namespace ReSolve { namespace cpu { - void count_sketch_theta(index_type n, - index_type k, + void count_sketch_theta(index_type n, + index_type k, index_type* labels, index_type* flip, - real_type* input, - real_type* output); + real_type* input, + real_type* output); - void FWHT_scaleByD(index_type n, - const index_type* D, - const real_type* x, - real_type* y); + void FWHT_scaleByD(index_type n, + const index_type* D, + const real_type* x, + real_type* y); - void FWHT_select(index_type k, - const index_type* perm, - const real_type* input, - real_type* output); + void FWHT_select(index_type k, + const index_type* perm, + const real_type* input, + real_type* output); void FWHT(index_type M, index_type log2N, real_type* d_Data); - } -} - + } // namespace cpu +} // namespace ReSolve diff --git a/resolve/resolve_defs.hpp.in b/resolve/resolve_defs.hpp.in index e2a6bb46b..f150ed46b 100644 --- a/resolve/resolve_defs.hpp.in +++ b/resolve/resolve_defs.hpp.in @@ -27,4 +27,4 @@ #endif -#endif // __RESOLVE_DEFINITIONS_HPP__ \ No newline at end of file +#endif // __RESOLVE_DEFINITIONS_HPP__ diff --git a/resolve/utilities/logger/Logger.cpp b/resolve/utilities/logger/Logger.cpp index f5227d951..5523d5b7e 100644 --- a/resolve/utilities/logger/Logger.cpp +++ b/resolve/utilities/logger/Logger.cpp @@ -4,9 +4,9 @@ * @author Slaven Peles */ +#include "Logger.hpp" #include -#include "Logger.hpp" namespace ReSolve { @@ -32,8 +32,8 @@ namespace ReSolve /** * @brief Sets verbosity level - * - * @pre `output_streams_` vector is allocated + * + * @pre `output_streams_` vector is allocated * @post Verbosity level is set to user supplied value `v` and outputs * for `output_streams_` are set accordingly. */ @@ -51,13 +51,13 @@ namespace ReSolve /** * @brief Private method to update verbosity. - * + * * This function directs each output stream <= `verbosity_` to user * selected output and sets all others to null device. Each output stream * corresponds to different verbosity level. - * + * * @param[in] output_streams - vector of pointers to output streams - * + * * @pre Vector `output_streams` is allocated and correctly initialized. * @post All streams `output_stream_[i]`, where `i <= verbosity_` are * directed to stream `logger_`. The rest are sent to null device @@ -83,9 +83,9 @@ namespace ReSolve /** * @brief Returns reference to output stream for error messages. - * + * * @return Reference to error messages stream in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::error() @@ -97,9 +97,9 @@ namespace ReSolve /** * @brief Returns reference to output stream for warning messages. - * + * * @return Reference to warning messages stream in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::warning() @@ -111,9 +111,9 @@ namespace ReSolve /** * @brief Returns reference to analysis summary messages output stream. - * + * * @return Reference to analysis summary messages stream in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::summary() @@ -124,10 +124,10 @@ namespace ReSolve /** * @brief Returns reference to output stream for all other messages. - * + * * @return Reference to output stream to miscellaneous messages * in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::misc() @@ -139,9 +139,9 @@ namespace ReSolve /** * @brief Open file `filename` and update outputs for different verbosities * streams. - * + * * @param[in] filename - The name of the output file. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. * @post All active streams are directed to user supplied file `filename`. */ @@ -154,12 +154,12 @@ namespace ReSolve /** * @brief Set outputs of active streams to user provided `std::ostream` object. - * + * * All active outputs are redirected to `out` stream. All inactive ones are * directed to null device. - * + * * @param[in] out - User provided output stream. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. * @post All active streams (`output_streams_[i]` where `i <= verbosity_`) * are set to user provided `out` output stream. @@ -172,7 +172,7 @@ namespace ReSolve /** * @brief Close output file. - * + * * @pre Output file `file_` has been opened. * @post Output file `file_` is closed and active output streams are * set to default output `std::cout`. diff --git a/resolve/utilities/logger/Logger.hpp b/resolve/utilities/logger/Logger.hpp index 8b3edbdfd..065f8f4b1 100644 --- a/resolve/utilities/logger/Logger.hpp +++ b/resolve/utilities/logger/Logger.hpp @@ -1,11 +1,11 @@ /** - * @file -*/ + * @file + */ #pragma once -#include #include +#include #include namespace ReSolve @@ -14,43 +14,50 @@ namespace ReSolve { /** * @brief Class that manages and logs outputs from Re::Solve code. - * + * * All methods and data in this class are static. - * + * */ class Logger { - public: - /// Enum specifying verbosity level for the output. - enum Verbosity {NONE=0, ERRORS, WARNINGS, SUMMARY, EVERYTHING}; - - // All methods and data are static so delete constructor and destructor. - Logger() = delete; - ~Logger() = delete; - - static std::ostream& error(); - static std::ostream& warning(); - static std::ostream& summary(); - static std::ostream& misc(); - - static void setOutput(std::ostream& out); - static void openOutputFile(std::string filename); - static void closeOutputFile(); - static void setVerbosity(Verbosity v); - static Verbosity verbosity(); - - static std::vector& init(); - - private: - static void updateVerbosity(std::vector& output_streams); - - private: - static std::ostream nullstream_; - static std::ofstream file_; - static std::ostream* logger_; - static std::vector output_streams_; - static std::vector tmp_; - static Verbosity verbosity_; + public: + /// Enum specifying verbosity level for the output. + enum Verbosity + { + NONE = 0, + ERRORS, + WARNINGS, + SUMMARY, + EVERYTHING + }; + + // All methods and data are static so delete constructor and destructor. + Logger() = delete; + ~Logger() = delete; + + static std::ostream& error(); + static std::ostream& warning(); + static std::ostream& summary(); + static std::ostream& misc(); + + static void setOutput(std::ostream& out); + static void openOutputFile(std::string filename); + static void closeOutputFile(); + static void setVerbosity(Verbosity v); + static Verbosity verbosity(); + + static std::vector& init(); + + private: + static void updateVerbosity(std::vector& output_streams); + + private: + static std::ostream nullstream_; + static std::ofstream file_; + static std::ostream* logger_; + static std::vector output_streams_; + static std::vector tmp_; + static Verbosity verbosity_; }; } // namespace io -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/utilities/params/CliOptions.cpp b/resolve/utilities/params/CliOptions.cpp index b65c2492b..7d3ad6084 100644 --- a/resolve/utilities/params/CliOptions.cpp +++ b/resolve/utilities/params/CliOptions.cpp @@ -1,7 +1,7 @@ -#include - #include "CliOptions.hpp" +#include + namespace ReSolve { @@ -29,9 +29,10 @@ namespace ReSolve std::unique_ptr CliOptions::getParamFromKey(const std::string& key) const { - const OptionsList::const_iterator i = options_.find(key); - auto opt = std::unique_ptr(nullptr); - if (i != options_.end()) { + const OptionsList::const_iterator i = options_.find(key); + auto opt = std::unique_ptr(nullptr); + if (i != options_.end()) + { opt = std::unique_ptr(new CliOptions::Option((*i).first, (*i).second)); } return opt; @@ -40,13 +41,15 @@ namespace ReSolve void CliOptions::printOptionsList() const { OptionsList::const_iterator m = options_.begin(); - int i = 0; - if (options_.empty()) { - std::cout << "No parameters\n"; + int i = 0; + if (options_.empty()) + { + std::cout << "No parameters\n"; } - for (; m != options_.end(); m++, ++i) { + for (; m != options_.end(); m++, ++i) + { std::cout << "Parameter [" << i << "] [" - << (*m).first << " " + << (*m).first << " " << (*m).second << "]\n"; } } @@ -87,7 +90,7 @@ namespace ReSolve // Set option without parameter value options_.insert(Option(option->first, option->second)); // Set parameter ID for the next option and continue - option->first = p; + option->first = p; option->second = ""; if (i == this->last()) { @@ -103,27 +106,26 @@ namespace ReSolve // Set option with parameter value options_.insert(Option(option->first, option->second)); // Reset 'option' pair to receive the next entry and continue - option->first = ""; + option->first = ""; option->second = ""; continue; } } } - const char* const *CliOptions::begin() const + const char* const* CliOptions::begin() const { - return argv_; + return argv_; } - const char* const *CliOptions::end() const + const char* const* CliOptions::end() const { - return argv_ + argc_; + return argv_ + argc_; } - const char* const *CliOptions::last() const + const char* const* CliOptions::last() const { - return argv_ + argc_ - 1; + return argv_ + argc_ - 1; } - } // namespace ReSolve diff --git a/resolve/utilities/params/CliOptions.hpp b/resolve/utilities/params/CliOptions.hpp index 78622bf91..5514e324d 100644 --- a/resolve/utilities/params/CliOptions.hpp +++ b/resolve/utilities/params/CliOptions.hpp @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include namespace ReSolve { @@ -14,24 +14,25 @@ namespace ReSolve */ class CliOptions { - public: - using Option = std::pair; - CliOptions(int argc, char* argv[]); - virtual ~CliOptions(); - std::string getAppName() const; - bool hasKey(const std::string&) const; - std::unique_ptr