From 3ef64c3f00fffe618fa2390b2cc33043b969462a Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Thu, 10 Oct 2024 18:57:10 +0800 Subject: [PATCH 01/13] Seperate blas kernels' declaration and definition --- source/module_base/CMakeLists.txt | 2 + source/module_base/blas_connector.cpp | 265 ++++++++++++++++++++++++++ source/module_base/blas_connector.h | 246 +++++------------------- 3 files changed, 312 insertions(+), 201 deletions(-) create mode 100644 source/module_base/blas_connector.cpp diff --git a/source/module_base/CMakeLists.txt b/source/module_base/CMakeLists.txt index dde98f915f..02e219fdf4 100644 --- a/source/module_base/CMakeLists.txt +++ b/source/module_base/CMakeLists.txt @@ -60,6 +60,8 @@ add_library( ${LIBM_SRC} ) +add_library(blas_connector.cpp) + add_subdirectory(module_container) if(ENABLE_COVERAGE) diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp new file mode 100644 index 0000000000..6a546bf999 --- /dev/null +++ b/source/module_base/blas_connector.cpp @@ -0,0 +1,265 @@ +#include "module_base/blas_connector.h" + +extern "C" +{ + // level 1: std::vector-std::vector operations, O(n) data and O(n) work. + + // Peize Lin add ?scal 2016-08-04, to compute x=a*x + void sscal_(const int *N, const float *alpha, float *X, const int *incX); + void dscal_(const int *N, const double *alpha, double *X, const int *incX); + void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); + void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); + + // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y + void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); + void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); + void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); + void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); + + void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); + void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); + + //reason for passing results as argument instead of returning it: + //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ + // void zdotc_(std::complex *result, const int *n, const std::complex *zx, + // const int *incx, const std::complex *zy, const int *incy); + // Peize Lin add ?dot 2017-10-27, to compute d=x*y + float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); + double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); + + // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } + float snrm2_( const int *n, const float *X, const int *incX ); + double dnrm2_( const int *n, const double *X, const int *incX ); + double dznrm2_( const int *n, const std::complex *X, const int *incX ); + + // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. + void dgemv_(const char*const transa, const int*const m, const int*const n, + const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, + const double*const beta, double*const y, const int*const incy); + + void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, + const std::complex *a, const int *lda, const std::complex *x, const int *incx, + const std::complex *beta, std::complex *y, const int *incy); + + void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, + const std::complex *a, const int *lda, const std::complex *x, const int *incx, + const std::complex *beta, std::complex *y, const int *incy); + + void dsymv_(const char *uplo, const int *n, + const double *alpha, const double *a, const int *lda, + const double *x, const int *incx, + const double *beta, double *y, const int *incy); + + // A := alpha x * y.T + A + void dger_(const int* m, + const int* n, + const double* alpha, + const double* x, + const int* incx, + const double* y, + const int* incy, + double* a, + const int* lda); + void zgerc_(const int* m, + const int* n, + const std::complex* alpha, + const std::complex* x, + const int* incx, + const std::complex* y, + const int* incy, + std::complex* a, + const int* lda); + + // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. + + // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C + // A is general + void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, + const float *beta, float *c, const int *ldc); + void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, + const double *beta, double *c, const int *ldc); + void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, + const std::complex *beta, std::complex *c, const int *ldc); + void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, + const std::complex *beta, std::complex *c, const int *ldc); + + //a is symmetric + void dsymm_(const char *side, const char *uplo, const int *m, const int *n, + const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, + const double *beta, double *c, const int *ldc); + //a is hermitian + void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, + std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); + + //solving triangular matrix with multiple right hand sides + void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, + double* alpha, double* a, int *lda, double*b, int *ldb); + void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, + std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); + +} + +inline +void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY) +{ + saxpy_(&n, &alpha, X, &incX, Y, &incY); +} + +inline +void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY) +{ + daxpy_(&n, &alpha, X, &incX, Y, &incY); +} + +inline +void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) +{ + caxpy_(&n, &alpha, X, &incX, Y, &incY); +} + +inline +void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) +{ + zaxpy_(&n, &alpha, X, &incX, Y, &incY); +} + + +// x=a*x +inline +void BlasConnector::scal( const int n, const float alpha, float *X, const int incX) +{ + sscal_(&n, &alpha, X, &incX); +} + +inline +void BlasConnector::scal( const int n, const double alpha, double *X, const int incX) +{ + dscal_(&n, &alpha, X, &incX); +} + +inline +void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX) +{ + cscal_(&n, &alpha, X, &incX); +} + +inline +void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX) +{ + zscal_(&n, &alpha, X, &incX); +} + + +// d=x*y +inline +float BlasConnector::dot( const int n, const float *X, const int incX, const float *Y, const int incY) +{ + return sdot_(&n, X, &incX, Y, &incY); +} +inline +double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY) +{ + return ddot_(&n, X, &incX, Y, &incY); +} + +// C = a * A.? * B.? + b * C +inline +void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, + const float alpha, const float *a, const int lda, const float *b, const int ldb, + const float beta, float *c, const int ldc) +{ + sgemm_(&transb, &transa, &n, &m, &k, + &alpha, b, &ldb, a, &lda, + &beta, c, &ldc); +} + +inline +void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, + const double alpha, const double *a, const int lda, const double *b, const int ldb, + const double beta, double *c, const int ldc) +{ + dgemm_(&transb, &transa, &n, &m, &k, + &alpha, b, &ldb, a, &lda, + &beta, c, &ldc); +} + +inline +void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, + const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, + const std::complex beta, std::complex *c, const int ldc) +{ + cgemm_(&transb, &transa, &n, &m, &k, + &alpha, b, &ldb, a, &lda, + &beta, c, &ldc); +} + +inline +void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, + const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, + const std::complex beta, std::complex *c, const int ldc) +{ + zgemm_(&transb, &transa, &n, &m, &k, + &alpha, b, &ldb, a, &lda, + &beta, c, &ldc); +} + +inline +void BlasConnector::gemv(const char trans, const int m, const int n, + const double alpha, const double* A, const int lda, const double* X, const int incx, + const double beta, double* Y, const int incy) +{ + dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); +} + +inline +void BlasConnector::gemv(const char trans, const int m, const int n, + const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, + const std::complex beta, std::complex *Y, const int incy) +{ + cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); +} + +inline +void BlasConnector::gemv(const char trans, const int m, const int n, + const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, + const std::complex beta, std::complex *Y, const int incy) +{ + zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); +} + + +// out = ||x||_2 +inline +float BlasConnector::nrm2( const int n, const float *X, const int incX ) +{ + return snrm2_( &n, X, &incX ); +} + +inline +double BlasConnector::nrm2( const int n, const double *X, const int incX ) +{ + return dnrm2_( &n, X, &incX ); +} + +inline +double BlasConnector::nrm2( const int n, const std::complex *X, const int incX ) +{ + return dznrm2_( &n, X, &incX ); +} + +// copies a into b +inline +void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy) +{ + dcopy_(&n, a, &incx, b, &incy); +} + +inline +void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy) +{ + zcopy_(&n, a, &incx, b, &incy); +} \ No newline at end of file diff --git a/source/module_base/blas_connector.h b/source/module_base/blas_connector.h index 90f1efae53..63ce85414f 100644 --- a/source/module_base/blas_connector.h +++ b/source/module_base/blas_connector.h @@ -3,109 +3,6 @@ #include -extern "C" -{ - // level 1: std::vector-std::vector operations, O(n) data and O(n) work. - - // Peize Lin add ?scal 2016-08-04, to compute x=a*x - void sscal_(const int *N, const float *alpha, float *X, const int *incX); - void dscal_(const int *N, const double *alpha, double *X, const int *incX); - void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - - // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y - void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); - void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); - void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - - void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); - void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); - - //reason for passing results as argument instead of returning it: - //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ - // void zdotc_(std::complex *result, const int *n, const std::complex *zx, - // const int *incx, const std::complex *zy, const int *incy); - // Peize Lin add ?dot 2017-10-27, to compute d=x*y - float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); - double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); - - // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } - float snrm2_( const int *n, const float *X, const int *incX ); - double dnrm2_( const int *n, const double *X, const int *incX ); - double dznrm2_( const int *n, const std::complex *X, const int *incX ); - - // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. - void dgemv_(const char*const transa, const int*const m, const int*const n, - const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, - const double*const beta, double*const y, const int*const incy); - - void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void dsymv_(const char *uplo, const int *n, - const double *alpha, const double *a, const int *lda, - const double *x, const int *incx, - const double *beta, double *y, const int *incy); - - // A := alpha x * y.T + A - void dger_(const int* m, - const int* n, - const double* alpha, - const double* x, - const int* incx, - const double* y, - const int* incy, - double* a, - const int* lda); - void zgerc_(const int* m, - const int* n, - const std::complex* alpha, - const std::complex* x, - const int* incx, - const std::complex* y, - const int* incy, - std::complex* a, - const int* lda); - - // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. - - // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C - // A is general - void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc); - void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - - //a is symmetric - void dsymm_(const char *side, const char *uplo, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - //a is hermitian - void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, - std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); - - //solving triangular matrix with multiple right hand sides - void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - double* alpha, double* a, int *lda, double*b, int *ldb); - void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); - -} - // Class BlasConnector provide the connector to fortran lapack routine. // The entire function in this class are static and inline function. // Usage example: BlasConnector::functionname(parameter list). @@ -116,151 +13,98 @@ class BlasConnector // Peize Lin add 2016-08-04 // y=a*x+y static inline - void axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY) - { - saxpy_(&n, &alpha, X, &incX, Y, &incY); - } + void axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY); + static inline - void axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY) - { - daxpy_(&n, &alpha, X, &incX, Y, &incY); - } + void axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); + static inline - void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) - { - caxpy_(&n, &alpha, X, &incX, Y, &incY); - } + void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY); + static inline - void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) - { - zaxpy_(&n, &alpha, X, &incX, Y, &incY); - } + void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY); + // Peize Lin add 2016-08-04 // x=a*x static inline - void scal( const int n, const float alpha, float *X, const int incX) - { - sscal_(&n, &alpha, X, &incX); - } + void scal( const int n, const float alpha, float *X, const int incX); + static inline - void scal( const int n, const double alpha, double *X, const int incX) - { - dscal_(&n, &alpha, X, &incX); - } + void scal( const int n, const double alpha, double *X, const int incX); + static inline - void scal( const int n, const std::complex alpha, std::complex *X, const int incX) - { - cscal_(&n, &alpha, X, &incX); - } + void scal( const int n, const std::complex alpha, std::complex *X, const int incX); + static inline - void scal( const int n, const std::complex alpha, std::complex *X, const int incX) - { - zscal_(&n, &alpha, X, &incX); - } + void scal( const int n, const std::complex alpha, std::complex *X, const int incX); + // Peize Lin add 2017-10-27 // d=x*y static inline - float dot( const int n, const float *X, const int incX, const float *Y, const int incY) - { - return sdot_(&n, X, &incX, Y, &incY); - } + float dot( const int n, const float *X, const int incX, const float *Y, const int incY); + static inline - double dot( const int n, const double *X, const int incX, const double *Y, const int incY) - { - return ddot_(&n, X, &incX, Y, &incY); - } + double dot( const int n, const double *X, const int incX, const double *Y, const int incY); + // Peize Lin add 2017-10-27, fix bug trans 2019-01-17 // C = a * A.? * B.? + b * C static inline void gemm(const char transa, const char transb, const int m, const int n, const int k, const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc) - { - sgemm_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc); - } + const float beta, float *c, const int ldc); + static inline void gemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc) - { - dgemm_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc); - } + const double beta, double *c, const int ldc); + static inline void gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc) - { - cgemm_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc); - } + const std::complex beta, std::complex *c, const int ldc); + static inline void gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc) - { - zgemm_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc); - } + const std::complex beta, std::complex *c, const int ldc); + static inline void gemv(const char trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* X, const int incx, - const double beta, double* Y, const int incy) - { - dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } + const double beta, double* Y, const int incy); + static inline - void gemv(const char trans, const int m, const int n, - const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy) - { - cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } + void gemv(const char trans, const int m, const int n, + const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, + const std::complex beta, std::complex *Y, const int incy); + static inline void gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy) - { - zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } + const std::complex beta, std::complex *Y, const int incy); + // Peize Lin add 2018-06-12 // out = ||x||_2 static inline - float nrm2( const int n, const float *X, const int incX ) - { - return snrm2_( &n, X, &incX ); - } + float nrm2( const int n, const float *X, const int incX ); + static inline - double nrm2( const int n, const double *X, const int incX ) - { - return dnrm2_( &n, X, &incX ); - } + double nrm2( const int n, const double *X, const int incX ); + static inline - double nrm2( const int n, const std::complex *X, const int incX ) - { - return dznrm2_( &n, X, &incX ); - } + double nrm2( const int n, const std::complex *X, const int incX ); + // copies a into b static inline - void copy(const long n, const double *a, const int incx, double *b, const int incy) - { - dcopy_(&n, a, &incx, b, &incy); - } + void copy(const long n, const double *a, const int incx, double *b, const int incy); + static inline - void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy) - { - zcopy_(&n, a, &incx, b, &incy); - } + void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy); }; // If GATHER_INFO is defined, the original function is replaced with a "i" suffix, From 76c085aabf473249546fff0afdb38d47dcc00cf0 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Thu, 10 Oct 2024 19:18:01 +0800 Subject: [PATCH 02/13] Fix compilation bug --- source/Makefile.Objects | 1 + source/module_base/CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/Makefile.Objects b/source/Makefile.Objects index d99287243d..9d08c04731 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -121,6 +121,7 @@ OBJS_MAIN=main.o\ OBJS_BASE=abfs-vector3_order.o\ assoc_laguerre.o\ + blas_connector.o\ complexarray.o\ complexmatrix.o\ clebsch_gordan_coeff.o\ diff --git a/source/module_base/CMakeLists.txt b/source/module_base/CMakeLists.txt index 02e219fdf4..e11141208c 100644 --- a/source/module_base/CMakeLists.txt +++ b/source/module_base/CMakeLists.txt @@ -11,6 +11,7 @@ add_library( base OBJECT assoc_laguerre.cpp + blas_connector.cpp clebsch_gordan_coeff.cpp complexarray.cpp complexmatrix.cpp @@ -60,8 +61,6 @@ add_library( ${LIBM_SRC} ) -add_library(blas_connector.cpp) - add_subdirectory(module_container) if(ENABLE_COVERAGE) From 03a4bdaedf11ccf8cb3ac2940dce51d0a2c8a164 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Thu, 10 Oct 2024 19:26:41 +0800 Subject: [PATCH 03/13] Move cblas link part to header file --- source/module_base/blas_connector.cpp | 103 ------------------------- source/module_base/blas_connector.h | 106 ++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 103 deletions(-) diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp index 6a546bf999..8be747ba08 100644 --- a/source/module_base/blas_connector.cpp +++ b/source/module_base/blas_connector.cpp @@ -1,108 +1,5 @@ #include "module_base/blas_connector.h" -extern "C" -{ - // level 1: std::vector-std::vector operations, O(n) data and O(n) work. - - // Peize Lin add ?scal 2016-08-04, to compute x=a*x - void sscal_(const int *N, const float *alpha, float *X, const int *incX); - void dscal_(const int *N, const double *alpha, double *X, const int *incX); - void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - - // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y - void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); - void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); - void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - - void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); - void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); - - //reason for passing results as argument instead of returning it: - //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ - // void zdotc_(std::complex *result, const int *n, const std::complex *zx, - // const int *incx, const std::complex *zy, const int *incy); - // Peize Lin add ?dot 2017-10-27, to compute d=x*y - float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); - double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); - - // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } - float snrm2_( const int *n, const float *X, const int *incX ); - double dnrm2_( const int *n, const double *X, const int *incX ); - double dznrm2_( const int *n, const std::complex *X, const int *incX ); - - // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. - void dgemv_(const char*const transa, const int*const m, const int*const n, - const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, - const double*const beta, double*const y, const int*const incy); - - void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void dsymv_(const char *uplo, const int *n, - const double *alpha, const double *a, const int *lda, - const double *x, const int *incx, - const double *beta, double *y, const int *incy); - - // A := alpha x * y.T + A - void dger_(const int* m, - const int* n, - const double* alpha, - const double* x, - const int* incx, - const double* y, - const int* incy, - double* a, - const int* lda); - void zgerc_(const int* m, - const int* n, - const std::complex* alpha, - const std::complex* x, - const int* incx, - const std::complex* y, - const int* incy, - std::complex* a, - const int* lda); - - // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. - - // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C - // A is general - void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc); - void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - - //a is symmetric - void dsymm_(const char *side, const char *uplo, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - //a is hermitian - void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, - std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); - - //solving triangular matrix with multiple right hand sides - void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - double* alpha, double* a, int *lda, double*b, int *ldb); - void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); - -} - inline void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY) { diff --git a/source/module_base/blas_connector.h b/source/module_base/blas_connector.h index 63ce85414f..a480d04ac9 100644 --- a/source/module_base/blas_connector.h +++ b/source/module_base/blas_connector.h @@ -3,6 +3,112 @@ #include +// These still need to be linked in the header file +// Because quite a lot of code will directly use the original cblas kernels. + +extern "C" +{ + // level 1: std::vector-std::vector operations, O(n) data and O(n) work. + + // Peize Lin add ?scal 2016-08-04, to compute x=a*x + void sscal_(const int *N, const float *alpha, float *X, const int *incX); + void dscal_(const int *N, const double *alpha, double *X, const int *incX); + void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); + void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); + + // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y + void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); + void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); + void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); + void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); + + void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); + void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); + + //reason for passing results as argument instead of returning it: + //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ + // void zdotc_(std::complex *result, const int *n, const std::complex *zx, + // const int *incx, const std::complex *zy, const int *incy); + // Peize Lin add ?dot 2017-10-27, to compute d=x*y + float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); + double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); + + // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } + float snrm2_( const int *n, const float *X, const int *incX ); + double dnrm2_( const int *n, const double *X, const int *incX ); + double dznrm2_( const int *n, const std::complex *X, const int *incX ); + + // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. + void dgemv_(const char*const transa, const int*const m, const int*const n, + const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, + const double*const beta, double*const y, const int*const incy); + + void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, + const std::complex *a, const int *lda, const std::complex *x, const int *incx, + const std::complex *beta, std::complex *y, const int *incy); + + void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, + const std::complex *a, const int *lda, const std::complex *x, const int *incx, + const std::complex *beta, std::complex *y, const int *incy); + + void dsymv_(const char *uplo, const int *n, + const double *alpha, const double *a, const int *lda, + const double *x, const int *incx, + const double *beta, double *y, const int *incy); + + // A := alpha x * y.T + A + void dger_(const int* m, + const int* n, + const double* alpha, + const double* x, + const int* incx, + const double* y, + const int* incy, + double* a, + const int* lda); + void zgerc_(const int* m, + const int* n, + const std::complex* alpha, + const std::complex* x, + const int* incx, + const std::complex* y, + const int* incy, + std::complex* a, + const int* lda); + + // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. + + // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C + // A is general + void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, + const float *beta, float *c, const int *ldc); + void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, + const double *beta, double *c, const int *ldc); + void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, + const std::complex *beta, std::complex *c, const int *ldc); + void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, + const std::complex *beta, std::complex *c, const int *ldc); + + //a is symmetric + void dsymm_(const char *side, const char *uplo, const int *m, const int *n, + const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, + const double *beta, double *c, const int *ldc); + //a is hermitian + void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, + std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); + + //solving triangular matrix with multiple right hand sides + void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, + double* alpha, double* a, int *lda, double*b, int *ldb); + void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, + std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); + +} + // Class BlasConnector provide the connector to fortran lapack routine. // The entire function in this class are static and inline function. // Usage example: BlasConnector::functionname(parameter list). From f8937b2271aa89eade70f194120e5d1193017d94 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Thu, 10 Oct 2024 21:26:10 +0800 Subject: [PATCH 04/13] Remove inline keyword --- source/module_base/blas_connector.cpp | 29 +++-------------- source/module_base/blas_connector.h | 46 +++++++++++++-------------- 2 files changed, 28 insertions(+), 47 deletions(-) diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp index 8be747ba08..11dc851e0e 100644 --- a/source/module_base/blas_connector.cpp +++ b/source/module_base/blas_connector.cpp @@ -1,24 +1,20 @@ -#include "module_base/blas_connector.h" +#include "blas_connector.h" -inline void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY) { saxpy_(&n, &alpha, X, &incX, Y, &incY); } -inline void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY) { daxpy_(&n, &alpha, X, &incX, Y, &incY); } -inline void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) { caxpy_(&n, &alpha, X, &incX, Y, &incY); } -inline void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) { zaxpy_(&n, &alpha, X, &incX, Y, &incY); @@ -26,25 +22,21 @@ void BlasConnector::axpy( const int n, const std::complex alpha, const s // x=a*x -inline void BlasConnector::scal( const int n, const float alpha, float *X, const int incX) { sscal_(&n, &alpha, X, &incX); } -inline void BlasConnector::scal( const int n, const double alpha, double *X, const int incX) { dscal_(&n, &alpha, X, &incX); } - -inline + void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX) { cscal_(&n, &alpha, X, &incX); } -inline void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX) { zscal_(&n, &alpha, X, &incX); @@ -52,19 +44,17 @@ void BlasConnector::scal( const int n, const std::complex alpha, std::co // d=x*y -inline float BlasConnector::dot( const int n, const float *X, const int incX, const float *Y, const int incY) { return sdot_(&n, X, &incX, Y, &incY); } -inline + double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY) { return ddot_(&n, X, &incX, Y, &incY); } // C = a * A.? * B.? + b * C -inline void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const float alpha, const float *a, const int lda, const float *b, const int ldb, const float beta, float *c, const int ldc) @@ -74,7 +64,6 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons &beta, c, &ldc); } -inline void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc) @@ -84,7 +73,6 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons &beta, c, &ldc); } -inline void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, const std::complex beta, std::complex *c, const int ldc) @@ -94,7 +82,6 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons &beta, c, &ldc); } -inline void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, const std::complex beta, std::complex *c, const int ldc) @@ -104,7 +91,6 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons &beta, c, &ldc); } -inline void BlasConnector::gemv(const char trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* X, const int incx, const double beta, double* Y, const int incy) @@ -112,7 +98,6 @@ void BlasConnector::gemv(const char trans, const int m, const int n, dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } -inline void BlasConnector::gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, const std::complex beta, std::complex *Y, const int incy) @@ -120,7 +105,6 @@ void BlasConnector::gemv(const char trans, const int m, const int n, cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } -inline void BlasConnector::gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, const std::complex beta, std::complex *Y, const int incy) @@ -130,32 +114,29 @@ void BlasConnector::gemv(const char trans, const int m, const int n, // out = ||x||_2 -inline float BlasConnector::nrm2( const int n, const float *X, const int incX ) { return snrm2_( &n, X, &incX ); } -inline + double BlasConnector::nrm2( const int n, const double *X, const int incX ) { return dnrm2_( &n, X, &incX ); } -inline + double BlasConnector::nrm2( const int n, const std::complex *X, const int incX ) { return dznrm2_( &n, X, &incX ); } // copies a into b -inline void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy) { dcopy_(&n, a, &incx, b, &incy); } -inline void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy) { zcopy_(&n, a, &incx, b, &incy); diff --git a/source/module_base/blas_connector.h b/source/module_base/blas_connector.h index a480d04ac9..c7cf2ed73e 100644 --- a/source/module_base/blas_connector.h +++ b/source/module_base/blas_connector.h @@ -118,76 +118,76 @@ class BlasConnector // Peize Lin add 2016-08-04 // y=a*x+y - static inline + static void axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY); - static inline + static void axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); - static inline + static void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY); - static inline + static void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY); // Peize Lin add 2016-08-04 // x=a*x - static inline + static void scal( const int n, const float alpha, float *X, const int incX); - static inline + static void scal( const int n, const double alpha, double *X, const int incX); - static inline + static void scal( const int n, const std::complex alpha, std::complex *X, const int incX); - static inline + static void scal( const int n, const std::complex alpha, std::complex *X, const int incX); // Peize Lin add 2017-10-27 // d=x*y - static inline + static float dot( const int n, const float *X, const int incX, const float *Y, const int incY); - static inline + static double dot( const int n, const double *X, const int incX, const double *Y, const int incY); // Peize Lin add 2017-10-27, fix bug trans 2019-01-17 // C = a * A.? * B.? + b * C - static inline + static void gemm(const char transa, const char transb, const int m, const int n, const int k, const float alpha, const float *a, const int lda, const float *b, const int ldb, const float beta, float *c, const int ldc); - static inline + static void gemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc); - static inline + static void gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, const std::complex beta, std::complex *c, const int ldc); - static inline + static void gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, const std::complex beta, std::complex *c, const int ldc); - static inline + static void gemv(const char trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* X, const int incx, const double beta, double* Y, const int incy); - static inline + static void gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, const std::complex beta, std::complex *Y, const int incy); - static inline + static void gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, const std::complex beta, std::complex *Y, const int incy); @@ -195,21 +195,21 @@ class BlasConnector // Peize Lin add 2018-06-12 // out = ||x||_2 - static inline + static float nrm2( const int n, const float *X, const int incX ); - static inline + static double nrm2( const int n, const double *X, const int incX ); - static inline + static double nrm2( const int n, const std::complex *X, const int incX ); // copies a into b - static inline + static void copy(const long n, const double *a, const int incx, double *b, const int incy); - static inline + static void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy); }; @@ -258,4 +258,4 @@ void zgemv_i(const char *trans, */ #endif // GATHER_INFO -#endif // BLAS_CONNECTOR_H +#endif // BLAS_CONNECTOR_H \ No newline at end of file From 83326b164d69cd43b64c23fcfa078b2b4253d861 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Fri, 11 Oct 2024 10:27:31 +0800 Subject: [PATCH 05/13] Fix test compilation --- source/module_base/test/CMakeLists.txt | 40 +++++++++---------- .../module_ao/test/CMakeLists.txt | 1 + .../module_pw/test/CMakeLists.txt | 2 +- .../module_xc/test/CMakeLists.txt | 1 + .../hamilt_pwdft/test/CMakeLists.txt | 1 + source/module_md/test/CMakeLists.txt | 1 + .../relax_new/test/CMakeLists.txt | 4 +- 7 files changed, 27 insertions(+), 23 deletions(-) diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt index 6ada3dec29..0e85616d89 100644 --- a/source/module_base/test/CMakeLists.txt +++ b/source/module_base/test/CMakeLists.txt @@ -18,12 +18,12 @@ AddTest( AddTest( TARGET base_tool_quit LIBS parameter - SOURCES tool_quit_test.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES tool_quit_test.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_tool_check LIBS parameter - SOURCES tool_check_test.cpp ../tool_check.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES tool_check_test.cpp ../tool_check.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_memory @@ -32,7 +32,7 @@ AddTest( ADDTest( TARGET base_global_function LIBS parameter ${math_libs} - SOURCES global_function_test.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../memory.cpp ../timer.cpp + SOURCES global_function_test.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_vector3 @@ -42,7 +42,7 @@ AddTest( AddTest( TARGET base_matrix3 LIBS parameter ${math_libs} - SOURCES matrix3_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES matrix3_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_intarray @@ -57,12 +57,12 @@ AddTest( AddTest( TARGET base_matrix LIBS parameter ${math_libs} - SOURCES matrix_test.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES matrix_test.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_complexarray LIBS parameter - SOURCES complexarray_test.cpp ../complexarray.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES complexarray_test.cpp ../complexarray.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_complexmatrix @@ -82,19 +82,19 @@ AddTest( AddTest( TARGET base_ylmreal LIBS parameter ${math_libs} device - SOURCES math_ylmreal_test.cpp ../math_ylmreal.cpp ../complexmatrix.cpp ../global_variable.cpp ../ylm.cpp ../realarray.cpp ../timer.cpp ../matrix.cpp ../vector3.h + SOURCES math_ylmreal_test.cpp ../math_ylmreal.cpp ../complexmatrix.cpp ../global_variable.cpp ../ylm.cpp ../realarray.cpp ../timer.cpp ../blas_connector.cpp ../matrix.cpp ../vector3.h ../parallel_reduce.cpp ../parallel_global.cpp ../parallel_comm.cpp ../parallel_common.cpp ../memory.cpp ../libm/branred.cpp ../libm/sincos.cpp ) AddTest( TARGET base_math_sphbes LIBS parameter - SOURCES math_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp + SOURCES math_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_mathzone LIBS parameter ${math_libs} - SOURCES mathzone_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES mathzone_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_mathzone_add1 @@ -104,7 +104,7 @@ AddTest( AddTest( TARGET base_math_polyint LIBS parameter - SOURCES math_polyint_test.cpp ../math_polyint.cpp ../realarray.cpp ../timer.cpp + SOURCES math_polyint_test.cpp ../math_polyint.cpp ../realarray.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_gram_schmidt_orth @@ -119,12 +119,12 @@ AddTest( AddTest( TARGET base_inverse_matrix LIBS parameter ${math_libs} - SOURCES inverse_matrix_test.cpp ../inverse_matrix.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp + SOURCES inverse_matrix_test.cpp ../inverse_matrix.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_mymath LIBS parameter - SOURCES mymath_test.cpp ../mymath.cpp ../timer.cpp + SOURCES mymath_test.cpp ../mymath.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_container @@ -135,7 +135,7 @@ AddTest( AddTest( TARGET base_math_chebyshev LIBS parameter ${math_libs} - SOURCES math_chebyshev_test.cpp ../math_chebyshev.cpp ../tool_quit.cpp ../global_variable.cpp ../timer.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp + SOURCES math_chebyshev_test.cpp ../math_chebyshev.cpp ../tool_quit.cpp ../global_variable.cpp ../timer.cpp ../blas_connector.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ) AddTest( @@ -159,7 +159,7 @@ AddTest( AddTest( TARGET base_ylm LIBS parameter - SOURCES ylm_test.cpp ../ylm.cpp ../timer.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp + SOURCES ylm_test.cpp ../ylm.cpp ../timer.cpp ../blas_connector.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ) AddTest( @@ -171,13 +171,13 @@ AddTest( AddTest( TARGET base_global_file LIBS parameter - SOURCES global_file_test.cpp ../global_file.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../memory.cpp ../timer.cpp + SOURCES global_file_test.cpp ../global_file.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_tool_title LIBS parameter - SOURCES tool_title_test.cpp ../tool_title.cpp ../global_variable.cpp ../global_function.cpp ../timer.cpp ../tool_quit.cpp ../global_file.cpp ../memory.cpp + SOURCES tool_title_test.cpp ../tool_title.cpp ../global_variable.cpp ../global_function.cpp ../timer.cpp ../blas_connector.cpp ../tool_quit.cpp ../global_file.cpp ../memory.cpp ) AddTest( @@ -194,7 +194,7 @@ AddTest( AddTest( TARGET spherical_bessel_transformer - SOURCES spherical_bessel_transformer_test.cpp ../spherical_bessel_transformer.cpp ../math_sphbes.cpp ../math_integral.cpp ../timer.cpp + SOURCES spherical_bessel_transformer_test.cpp ../spherical_bessel_transformer.cpp ../math_sphbes.cpp ../math_integral.cpp ../timer.cpp ../blas_connector.cpp LIBS parameter ${math_libs} ) @@ -206,7 +206,7 @@ AddTest( AddTest( TARGET clebsch_gordan_coeff_test - SOURCES clebsch_gordan_coeff_test.cpp ../clebsch_gordan_coeff.cpp ../intarray.cpp ../realarray.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp + SOURCES clebsch_gordan_coeff_test.cpp ../clebsch_gordan_coeff.cpp ../intarray.cpp ../realarray.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp ../blas_connector.cpp ../math_ylmreal.cpp ../global_variable.cpp ../ylm.cpp ../timer.cpp ../vector3.h ../parallel_reduce.cpp ../parallel_global.cpp ../parallel_comm.cpp ../parallel_common.cpp ../memory.cpp ../libm/branred.cpp ../libm/sincos.cpp ../inverse_matrix.cpp ../lapack_connector.h LIBS parameter ${math_libs} device @@ -214,7 +214,7 @@ AddTest( AddTest( TARGET assoc_laguerre_test - SOURCES assoc_laguerre_test.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES assoc_laguerre_test.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp LIBS parameter ${math_libs} ) @@ -234,6 +234,6 @@ if(ENABLE_GOOGLEBENCH) AddTest( TARGET perf_sphbes LIBS parameter - SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp + SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp ../blas_connector.cpp ) endif() diff --git a/source/module_basis/module_ao/test/CMakeLists.txt b/source/module_basis/module_ao/test/CMakeLists.txt index db9c648b12..504f59ded2 100644 --- a/source/module_basis/module_ao/test/CMakeLists.txt +++ b/source/module_basis/module_ao/test/CMakeLists.txt @@ -18,6 +18,7 @@ list(APPEND depend_files ../../../module_base/tool_quit.cpp ../../../module_base/tool_check.cpp ../../../module_base/timer.cpp + ../../../module_base/blas_connector.cpp ../../../module_base/mathzone_add1.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_function.cpp diff --git a/source/module_basis/module_pw/test/CMakeLists.txt b/source/module_basis/module_pw/test/CMakeLists.txt index 1306a5f7ba..e1ce122d07 100644 --- a/source/module_basis/module_pw/test/CMakeLists.txt +++ b/source/module_basis/module_pw/test/CMakeLists.txt @@ -3,7 +3,7 @@ AddTest( TARGET pw_test LIBS parameter ${math_libs} planewave device SOURCES ../../../module_base/matrix.cpp ../../../module_base/complexmatrix.cpp ../../../module_base/matrix3.cpp ../../../module_base/tool_quit.cpp - ../../../module_base/mymath.cpp ../../../module_base/timer.cpp ../../../module_base/memory.cpp + ../../../module_base/mymath.cpp ../../../module_base/timer.cpp ../../../module_base/memory.cpp ../../../module_base/blas_connector.cpp ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp # ../../../module_psi/kernels/psi_memory_op.cpp ../../../module_base/module_device/memory_op.cpp diff --git a/source/module_hamilt_general/module_xc/test/CMakeLists.txt b/source/module_hamilt_general/module_xc/test/CMakeLists.txt index 46ae57e9ef..ce4c0796a9 100644 --- a/source/module_hamilt_general/module_xc/test/CMakeLists.txt +++ b/source/module_hamilt_general/module_xc/test/CMakeLists.txt @@ -69,6 +69,7 @@ AddTest( ../../../module_base/matrix.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp + ../../../module_base/blas_connector.cpp ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp ) \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt index ed60c0ec81..c1b56517b7 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt +++ b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt @@ -15,6 +15,7 @@ AddTest( ../../../module_base/global_file.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp + ../../../module_base/blas_connector.cpp ../../../module_base/parallel_global.cpp ../../../module_base/parallel_comm.cpp ../../../module_base/parallel_common.cpp diff --git a/source/module_md/test/CMakeLists.txt b/source/module_md/test/CMakeLists.txt index 64e21fc6f6..e92f8cc369 100644 --- a/source/module_md/test/CMakeLists.txt +++ b/source/module_md/test/CMakeLists.txt @@ -19,6 +19,7 @@ list(APPEND depend_files ../../module_base/matrix3.cpp ../../module_base/matrix.cpp ../../module_base/timer.cpp + ../../module_base/blas_connector.cpp ../../module_base/memory.cpp ../../module_base/global_variable.cpp ../../module_base/global_function.cpp diff --git a/source/module_relax/relax_new/test/CMakeLists.txt b/source/module_relax/relax_new/test/CMakeLists.txt index 2416e195a7..c6ec3d0735 100644 --- a/source/module_relax/relax_new/test/CMakeLists.txt +++ b/source/module_relax/relax_new/test/CMakeLists.txt @@ -9,12 +9,12 @@ install(DIRECTORY support DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) AddTest( TARGET relax_new_line_search LIBS parameter - SOURCES line_search_test.cpp ../line_search.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/global_function.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/tool_quit.cpp + SOURCES line_search_test.cpp ../line_search.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/global_function.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/blas_connector.cpp ../../../module_base/tool_quit.cpp ) AddTest( TARGET relax_new_relax - SOURCES relax_test.cpp ../relax.cpp ../line_search.cpp ../../../module_base/tool_quit.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp + SOURCES relax_test.cpp ../relax.cpp ../line_search.cpp ../../../module_base/tool_quit.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/blas_connector.cpp ../../../module_base/matrix3.cpp ../../../module_base/intarray.cpp ../../../module_base/tool_title.cpp ../../../module_base/global_function.cpp ../../../module_base/complexmatrix.cpp ../../../module_base/matrix.cpp ../../../module_base/complexarray.cpp ../../../module_base/tool_quit.cpp ../../../module_base/realarray.cpp From ef3cb173fb396c3c191ab94dcd78390c89429715 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Fri, 11 Oct 2024 11:11:23 +0800 Subject: [PATCH 06/13] Fix --- source/module_base/test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt index 0e85616d89..42128fc001 100644 --- a/source/module_base/test/CMakeLists.txt +++ b/source/module_base/test/CMakeLists.txt @@ -3,7 +3,7 @@ install(DIRECTORY data DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) AddTest( TARGET base_blas_connector LIBS parameter ${math_libs} - SOURCES blas_connector_test.cpp + SOURCES blas_connector_test.cpp ../blas_connector.cpp ) AddTest( TARGET base_atom_in From 2a59b001f461192d7e91ca398458946537f0ddd0 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Fri, 11 Oct 2024 11:57:36 +0800 Subject: [PATCH 07/13] Fix --- source/module_base/blas_connector.cpp | 103 ++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp index 11dc851e0e..f3b47ab68f 100644 --- a/source/module_base/blas_connector.cpp +++ b/source/module_base/blas_connector.cpp @@ -1,5 +1,108 @@ #include "blas_connector.h" +extern "C" +{ + // level 1: std::vector-std::vector operations, O(n) data and O(n) work. + + // Peize Lin add ?scal 2016-08-04, to compute x=a*x + void sscal_(const int *N, const float *alpha, float *X, const int *incX); + void dscal_(const int *N, const double *alpha, double *X, const int *incX); + void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); + void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); + + // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y + void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); + void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); + void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); + void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); + + void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); + void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); + + //reason for passing results as argument instead of returning it: + //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ + // void zdotc_(std::complex *result, const int *n, const std::complex *zx, + // const int *incx, const std::complex *zy, const int *incy); + // Peize Lin add ?dot 2017-10-27, to compute d=x*y + float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); + double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); + + // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } + float snrm2_( const int *n, const float *X, const int *incX ); + double dnrm2_( const int *n, const double *X, const int *incX ); + double dznrm2_( const int *n, const std::complex *X, const int *incX ); + + // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. + void dgemv_(const char*const transa, const int*const m, const int*const n, + const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, + const double*const beta, double*const y, const int*const incy); + + void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, + const std::complex *a, const int *lda, const std::complex *x, const int *incx, + const std::complex *beta, std::complex *y, const int *incy); + + void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, + const std::complex *a, const int *lda, const std::complex *x, const int *incx, + const std::complex *beta, std::complex *y, const int *incy); + + void dsymv_(const char *uplo, const int *n, + const double *alpha, const double *a, const int *lda, + const double *x, const int *incx, + const double *beta, double *y, const int *incy); + + // A := alpha x * y.T + A + void dger_(const int* m, + const int* n, + const double* alpha, + const double* x, + const int* incx, + const double* y, + const int* incy, + double* a, + const int* lda); + void zgerc_(const int* m, + const int* n, + const std::complex* alpha, + const std::complex* x, + const int* incx, + const std::complex* y, + const int* incy, + std::complex* a, + const int* lda); + + // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. + + // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C + // A is general + void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, + const float *beta, float *c, const int *ldc); + void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, + const double *beta, double *c, const int *ldc); + void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, + const std::complex *beta, std::complex *c, const int *ldc); + void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, + const std::complex *beta, std::complex *c, const int *ldc); + + //a is symmetric + void dsymm_(const char *side, const char *uplo, const int *m, const int *n, + const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, + const double *beta, double *c, const int *ldc); + //a is hermitian + void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, + std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); + + //solving triangular matrix with multiple right hand sides + void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, + double* alpha, double* a, int *lda, double*b, int *ldb); + void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, + std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); + +} + void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY) { saxpy_(&n, &alpha, X, &incX, Y, &incY); From 487b88c66271bf08182268b41fa549ce922511c4 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Fri, 11 Oct 2024 14:46:10 +0800 Subject: [PATCH 08/13] Fix library sequence --- source/module_base/test/CMakeLists.txt | 64 +++++++++---------- .../module_ao/test/CMakeLists.txt | 2 +- .../module_xc/test/CMakeLists.txt | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt index 42128fc001..76e42695b5 100644 --- a/source/module_base/test/CMakeLists.txt +++ b/source/module_base/test/CMakeLists.txt @@ -18,12 +18,12 @@ AddTest( AddTest( TARGET base_tool_quit LIBS parameter - SOURCES tool_quit_test.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES tool_quit_test.cpp ../blas_connector.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_tool_check LIBS parameter - SOURCES tool_check_test.cpp ../tool_check.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES tool_check_test.cpp ../blas_connector.cpp ../tool_check.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_memory @@ -32,7 +32,7 @@ AddTest( ADDTest( TARGET base_global_function LIBS parameter ${math_libs} - SOURCES global_function_test.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES global_function_test.cpp ../blas_connector.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_vector3 @@ -42,7 +42,7 @@ AddTest( AddTest( TARGET base_matrix3 LIBS parameter ${math_libs} - SOURCES matrix3_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES matrix3_test.cpp ../blas_connector.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_intarray @@ -57,59 +57,59 @@ AddTest( AddTest( TARGET base_matrix LIBS parameter ${math_libs} - SOURCES matrix_test.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES matrix_test.cpp ../blas_connector.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_complexarray LIBS parameter - SOURCES complexarray_test.cpp ../complexarray.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES complexarray_test.cpp ../blas_connector.cpp ../complexarray.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_complexmatrix LIBS parameter ${math_libs} - SOURCES complexmatrix_test.cpp ../complexmatrix.cpp ../matrix.cpp + SOURCES complexmatrix_test.cpp ../blas_connector.cpp ../complexmatrix.cpp ../matrix.cpp ) AddTest( TARGET base_integral LIBS parameter - SOURCES math_integral_test.cpp ../math_integral.cpp + SOURCES math_integral_test.cpp ../blas_connector.cpp ../math_integral.cpp ) AddTest( TARGET base_sph_bessel_recursive LIBS parameter - SOURCES sph_bessel_recursive_test.cpp ../sph_bessel_recursive-d1.cpp ../sph_bessel_recursive-d2.cpp + SOURCES sph_bessel_recursive_test.cpp ../blas_connector.cpp ../sph_bessel_recursive-d1.cpp ../sph_bessel_recursive-d2.cpp ) AddTest( TARGET base_ylmreal LIBS parameter ${math_libs} device - SOURCES math_ylmreal_test.cpp ../math_ylmreal.cpp ../complexmatrix.cpp ../global_variable.cpp ../ylm.cpp ../realarray.cpp ../timer.cpp ../blas_connector.cpp ../matrix.cpp ../vector3.h + SOURCES math_ylmreal_test.cpp ../blas_connector.cpp ../math_ylmreal.cpp ../complexmatrix.cpp ../global_variable.cpp ../ylm.cpp ../realarray.cpp ../timer.cpp ../matrix.cpp ../vector3.h ../parallel_reduce.cpp ../parallel_global.cpp ../parallel_comm.cpp ../parallel_common.cpp ../memory.cpp ../libm/branred.cpp ../libm/sincos.cpp ) AddTest( TARGET base_math_sphbes LIBS parameter - SOURCES math_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp ../blas_connector.cpp + SOURCES math_sphbes_test.cpp ../blas_connector.cpp ../math_sphbes.cpp ../timer.cpp ) AddTest( TARGET base_mathzone LIBS parameter ${math_libs} - SOURCES mathzone_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES mathzone_test.cpp ../blas_connector.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_mathzone_add1 LIBS parameter ${math_libs} - SOURCES mathzone_add1_test.cpp ../mathzone_add1.cpp ../math_sphbes.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES mathzone_add1_test.cpp ../blas_connector.cpp ../mathzone_add1.cpp ../math_sphbes.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_math_polyint LIBS parameter - SOURCES math_polyint_test.cpp ../math_polyint.cpp ../realarray.cpp ../timer.cpp ../blas_connector.cpp + SOURCES math_polyint_test.cpp ../blas_connector.cpp ../math_polyint.cpp ../realarray.cpp ../timer.cpp ) AddTest( TARGET base_gram_schmidt_orth LIBS parameter ${math_libs} - SOURCES gram_schmidt_orth_test.cpp ../gram_schmidt_orth.h ../gram_schmidt_orth-inl.h ../global_function.h ../math_integral.cpp + SOURCES gram_schmidt_orth_test.cpp ../blas_connector.cpp ../gram_schmidt_orth.h ../gram_schmidt_orth-inl.h ../global_function.h ../math_integral.cpp ) AddTest( TARGET base_math_bspline @@ -119,41 +119,41 @@ AddTest( AddTest( TARGET base_inverse_matrix LIBS parameter ${math_libs} - SOURCES inverse_matrix_test.cpp ../inverse_matrix.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp ../blas_connector.cpp + SOURCES inverse_matrix_test.cpp ../blas_connector.cpp ../inverse_matrix.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp ) AddTest( TARGET base_mymath LIBS parameter - SOURCES mymath_test.cpp ../mymath.cpp ../timer.cpp ../blas_connector.cpp + SOURCES mymath_test.cpp ../blas_connector.cpp ../mymath.cpp ../timer.cpp ) AddTest( TARGET base_container LIBS parameter - SOURCES container_operator_test.cpp ../container_operator.h + SOURCES container_operator_test.cpp ../blas_connector.cpp ../container_operator.h ) AddTest( TARGET base_math_chebyshev LIBS parameter ${math_libs} - SOURCES math_chebyshev_test.cpp ../math_chebyshev.cpp ../tool_quit.cpp ../global_variable.cpp ../timer.cpp ../blas_connector.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp + SOURCES math_chebyshev_test.cpp ../blas_connector.cpp ../math_chebyshev.cpp ../tool_quit.cpp ../global_variable.cpp ../timer.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ) AddTest( TARGET base_lapack_connector LIBS parameter ${math_libs} - SOURCES lapack_connector_test.cpp ../lapack_connector.h + SOURCES lapack_connector_test.cpp ../blas_connector.cpp ../lapack_connector.h ) AddTest( TARGET base_opt_CG LIBS parameter ${math_libs} - SOURCES opt_CG_test.cpp opt_test_tools.cpp ../opt_CG.cpp ../opt_DCsrch.cpp ../global_variable.cpp ../parallel_reduce.cpp + SOURCES opt_CG_test.cpp opt_test_tools.cpp ../blas_connector.cpp ../opt_CG.cpp ../opt_DCsrch.cpp ../global_variable.cpp ../parallel_reduce.cpp ) AddTest( TARGET base_opt_TN LIBS parameter ${math_libs} - SOURCES opt_TN_test.cpp opt_test_tools.cpp ../opt_CG.cpp ../opt_DCsrch.cpp ../global_variable.cpp ../parallel_reduce.cpp + SOURCES opt_TN_test.cpp opt_test_tools.cpp ../blas_connector.cpp ../opt_CG.cpp ../opt_DCsrch.cpp ../global_variable.cpp ../parallel_reduce.cpp ) AddTest( @@ -165,48 +165,48 @@ AddTest( AddTest( TARGET base_abfs-vector3_order LIBS parameter - SOURCES abfs-vector3_order_test.cpp ../abfs-vector3_order.cpp + SOURCES abfs-vector3_order_test.cpp ../blas_connector.cpp ../abfs-vector3_order.cpp ) AddTest( TARGET base_global_file LIBS parameter - SOURCES global_file_test.cpp ../global_file.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES global_file_test.cpp ../blas_connector.cpp ../global_file.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_tool_title LIBS parameter - SOURCES tool_title_test.cpp ../tool_title.cpp ../global_variable.cpp ../global_function.cpp ../timer.cpp ../blas_connector.cpp ../tool_quit.cpp ../global_file.cpp ../memory.cpp + SOURCES tool_title_test.cpp ../blas_connector.cpp ../tool_title.cpp ../global_variable.cpp ../global_function.cpp ../timer.cpp ../tool_quit.cpp ../global_file.cpp ../memory.cpp ) AddTest( TARGET base_element_basis_index LIBS parameter - SOURCES element_basis_index_test.cpp ../element_basis_index.cpp + SOURCES element_basis_index_test.cpp ../blas_connector.cpp ../element_basis_index.cpp ) AddTest( TARGET base_tool_threading LIBS parameter - SOURCES tool_threading_test.cpp ../tool_threading.h + SOURCES tool_threading_test.cpp ../blas_connector.cpp ../tool_threading.h ) AddTest( TARGET spherical_bessel_transformer - SOURCES spherical_bessel_transformer_test.cpp ../spherical_bessel_transformer.cpp ../math_sphbes.cpp ../math_integral.cpp ../timer.cpp ../blas_connector.cpp + SOURCES spherical_bessel_transformer_test.cpp ../blas_connector.cpp ../spherical_bessel_transformer.cpp ../math_sphbes.cpp ../math_integral.cpp ../timer.cpp LIBS parameter ${math_libs} ) AddTest( TARGET cubic_spline - SOURCES cubic_spline_test.cpp ../cubic_spline.cpp + SOURCES cubic_spline_test.cpp ../blas_connector.cpp ../cubic_spline.cpp LIBS parameter ${math_libs} ) AddTest( TARGET clebsch_gordan_coeff_test - SOURCES clebsch_gordan_coeff_test.cpp ../clebsch_gordan_coeff.cpp ../intarray.cpp ../realarray.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp ../blas_connector.cpp + SOURCES clebsch_gordan_coeff_test.cpp ../blas_connector.cpp ../clebsch_gordan_coeff.cpp ../intarray.cpp ../realarray.cpp ../complexmatrix.cpp ../matrix.cpp ../timer.cpp ../math_ylmreal.cpp ../global_variable.cpp ../ylm.cpp ../timer.cpp ../vector3.h ../parallel_reduce.cpp ../parallel_global.cpp ../parallel_comm.cpp ../parallel_common.cpp ../memory.cpp ../libm/branred.cpp ../libm/sincos.cpp ../inverse_matrix.cpp ../lapack_connector.h LIBS parameter ${math_libs} device @@ -214,7 +214,7 @@ AddTest( AddTest( TARGET assoc_laguerre_test - SOURCES assoc_laguerre_test.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp + SOURCES assoc_laguerre_test.cpp ../blas_connector.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp LIBS parameter ${math_libs} ) @@ -234,6 +234,6 @@ if(ENABLE_GOOGLEBENCH) AddTest( TARGET perf_sphbes LIBS parameter - SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp ../blas_connector.cpp + SOURCES perf_sphbes_test.cpp ../blas_connector.cpp ../math_sphbes.cpp ../timer.cpp ) endif() diff --git a/source/module_basis/module_ao/test/CMakeLists.txt b/source/module_basis/module_ao/test/CMakeLists.txt index 504f59ded2..c1708eef85 100644 --- a/source/module_basis/module_ao/test/CMakeLists.txt +++ b/source/module_basis/module_ao/test/CMakeLists.txt @@ -7,6 +7,7 @@ list(APPEND depend_files ../../../module_base/math_ylmreal.cpp ../../../module_base/ylm.cpp ../../../module_base/memory.cpp + ../../../module_base/blas_connector.cpp ../../../module_base/complexarray.cpp ../../../module_base/complexmatrix.cpp ../../../module_base/matrix.cpp @@ -18,7 +19,6 @@ list(APPEND depend_files ../../../module_base/tool_quit.cpp ../../../module_base/tool_check.cpp ../../../module_base/timer.cpp - ../../../module_base/blas_connector.cpp ../../../module_base/mathzone_add1.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_function.cpp diff --git a/source/module_hamilt_general/module_xc/test/CMakeLists.txt b/source/module_hamilt_general/module_xc/test/CMakeLists.txt index ce4c0796a9..d00cd3b19c 100644 --- a/source/module_hamilt_general/module_xc/test/CMakeLists.txt +++ b/source/module_hamilt_general/module_xc/test/CMakeLists.txt @@ -66,10 +66,10 @@ AddTest( ../xc_functional_vxc.cpp ../xc_functional_libxc_vxc.cpp ../xc_functional_libxc_tools.cpp + ../../../module_base/blas_connector.cpp ../../../module_base/matrix.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp - ../../../module_base/blas_connector.cpp ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp ) \ No newline at end of file From 15fe6a4b421a5bd9d082f0b389552a9cca0e630b Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Fri, 11 Oct 2024 14:59:50 +0800 Subject: [PATCH 09/13] Optimize link usage --- source/module_base/blas_connector.cpp | 103 -------------------------- 1 file changed, 103 deletions(-) diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp index f3b47ab68f..11dc851e0e 100644 --- a/source/module_base/blas_connector.cpp +++ b/source/module_base/blas_connector.cpp @@ -1,108 +1,5 @@ #include "blas_connector.h" -extern "C" -{ - // level 1: std::vector-std::vector operations, O(n) data and O(n) work. - - // Peize Lin add ?scal 2016-08-04, to compute x=a*x - void sscal_(const int *N, const float *alpha, float *X, const int *incX); - void dscal_(const int *N, const double *alpha, double *X, const int *incX); - void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - - // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y - void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); - void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); - void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - - void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); - void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); - - //reason for passing results as argument instead of returning it: - //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ - // void zdotc_(std::complex *result, const int *n, const std::complex *zx, - // const int *incx, const std::complex *zy, const int *incy); - // Peize Lin add ?dot 2017-10-27, to compute d=x*y - float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); - double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); - - // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } - float snrm2_( const int *n, const float *X, const int *incX ); - double dnrm2_( const int *n, const double *X, const int *incX ); - double dznrm2_( const int *n, const std::complex *X, const int *incX ); - - // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. - void dgemv_(const char*const transa, const int*const m, const int*const n, - const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, - const double*const beta, double*const y, const int*const incy); - - void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void dsymv_(const char *uplo, const int *n, - const double *alpha, const double *a, const int *lda, - const double *x, const int *incx, - const double *beta, double *y, const int *incy); - - // A := alpha x * y.T + A - void dger_(const int* m, - const int* n, - const double* alpha, - const double* x, - const int* incx, - const double* y, - const int* incy, - double* a, - const int* lda); - void zgerc_(const int* m, - const int* n, - const std::complex* alpha, - const std::complex* x, - const int* incx, - const std::complex* y, - const int* incy, - std::complex* a, - const int* lda); - - // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. - - // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C - // A is general - void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc); - void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - - //a is symmetric - void dsymm_(const char *side, const char *uplo, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - //a is hermitian - void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, - std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); - - //solving triangular matrix with multiple right hand sides - void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - double* alpha, double* a, int *lda, double*b, int *ldb); - void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); - -} - void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY) { saxpy_(&n, &alpha, X, &incX, Y, &incY); From 948056b808041d9c780728d85c73d4dc8e3e638a Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Tue, 15 Oct 2024 17:04:57 +0800 Subject: [PATCH 10/13] Fix test building error --- source/module_base/test/CMakeLists.txt | 36 +++++++++---------- .../module_pw/kernels/test/CMakeLists.txt | 2 +- .../kernels/test/CMakeLists.txt | 2 +- .../relax_new/test/CMakeLists.txt | 6 ++-- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt index 76e42695b5..86e05d9478 100644 --- a/source/module_base/test/CMakeLists.txt +++ b/source/module_base/test/CMakeLists.txt @@ -18,12 +18,12 @@ AddTest( AddTest( TARGET base_tool_quit LIBS parameter - SOURCES tool_quit_test.cpp ../blas_connector.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES tool_quit_test.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_tool_check LIBS parameter - SOURCES tool_check_test.cpp ../blas_connector.cpp ../tool_check.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES tool_check_test.cpp ../tool_check.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_memory @@ -42,7 +42,7 @@ AddTest( AddTest( TARGET base_matrix3 LIBS parameter ${math_libs} - SOURCES matrix3_test.cpp ../blas_connector.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES matrix3_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_intarray @@ -62,7 +62,7 @@ AddTest( AddTest( TARGET base_complexarray LIBS parameter - SOURCES complexarray_test.cpp ../blas_connector.cpp ../complexarray.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES complexarray_test.cpp ../complexarray.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_complexmatrix @@ -72,12 +72,12 @@ AddTest( AddTest( TARGET base_integral LIBS parameter - SOURCES math_integral_test.cpp ../blas_connector.cpp ../math_integral.cpp + SOURCES math_integral_test.cpp ../math_integral.cpp ) AddTest( TARGET base_sph_bessel_recursive LIBS parameter - SOURCES sph_bessel_recursive_test.cpp ../blas_connector.cpp ../sph_bessel_recursive-d1.cpp ../sph_bessel_recursive-d2.cpp + SOURCES sph_bessel_recursive_test.cpp ../sph_bessel_recursive-d1.cpp ../sph_bessel_recursive-d2.cpp ) AddTest( TARGET base_ylmreal @@ -89,12 +89,12 @@ AddTest( AddTest( TARGET base_math_sphbes LIBS parameter - SOURCES math_sphbes_test.cpp ../blas_connector.cpp ../math_sphbes.cpp ../timer.cpp + SOURCES math_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp ) AddTest( TARGET base_mathzone LIBS parameter ${math_libs} - SOURCES mathzone_test.cpp ../blas_connector.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp + SOURCES mathzone_test.cpp ../matrix3.cpp ../matrix.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp ../blas_connector.cpp ) AddTest( TARGET base_mathzone_add1 @@ -104,7 +104,7 @@ AddTest( AddTest( TARGET base_math_polyint LIBS parameter - SOURCES math_polyint_test.cpp ../blas_connector.cpp ../math_polyint.cpp ../realarray.cpp ../timer.cpp + SOURCES math_polyint_test.cpp ../math_polyint.cpp ../realarray.cpp ../timer.cpp ) AddTest( TARGET base_gram_schmidt_orth @@ -124,12 +124,12 @@ AddTest( AddTest( TARGET base_mymath LIBS parameter - SOURCES mymath_test.cpp ../blas_connector.cpp ../mymath.cpp ../timer.cpp + SOURCES mymath_test.cpp ../mymath.cpp ../timer.cpp ) AddTest( TARGET base_container LIBS parameter - SOURCES container_operator_test.cpp ../blas_connector.cpp ../container_operator.h + SOURCES container_operator_test.cpp ../container_operator.h ) AddTest( @@ -159,37 +159,37 @@ AddTest( AddTest( TARGET base_ylm LIBS parameter - SOURCES ylm_test.cpp ../ylm.cpp ../timer.cpp ../blas_connector.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp + SOURCES ylm_test.cpp ../ylm.cpp ../timer.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ) AddTest( TARGET base_abfs-vector3_order LIBS parameter - SOURCES abfs-vector3_order_test.cpp ../blas_connector.cpp ../abfs-vector3_order.cpp + SOURCES abfs-vector3_order_test.cpp ../abfs-vector3_order.cpp ) AddTest( TARGET base_global_file LIBS parameter - SOURCES global_file_test.cpp ../blas_connector.cpp ../global_file.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../memory.cpp ../timer.cpp + SOURCES global_file_test.cpp ../global_file.cpp ../global_function.cpp ../tool_quit.cpp ../global_variable.cpp ../memory.cpp ../timer.cpp ) AddTest( TARGET base_tool_title LIBS parameter - SOURCES tool_title_test.cpp ../blas_connector.cpp ../tool_title.cpp ../global_variable.cpp ../global_function.cpp ../timer.cpp ../tool_quit.cpp ../global_file.cpp ../memory.cpp + SOURCES tool_title_test.cpp ../tool_title.cpp ../global_variable.cpp ../global_function.cpp ../timer.cpp ../tool_quit.cpp ../global_file.cpp ../memory.cpp ) AddTest( TARGET base_element_basis_index LIBS parameter - SOURCES element_basis_index_test.cpp ../blas_connector.cpp ../element_basis_index.cpp + SOURCES element_basis_index_test.cpp ../element_basis_index.cpp ) AddTest( TARGET base_tool_threading LIBS parameter - SOURCES tool_threading_test.cpp ../blas_connector.cpp ../tool_threading.h + SOURCES tool_threading_test.cpp ../tool_threading.h ) AddTest( @@ -234,6 +234,6 @@ if(ENABLE_GOOGLEBENCH) AddTest( TARGET perf_sphbes LIBS parameter - SOURCES perf_sphbes_test.cpp ../blas_connector.cpp ../math_sphbes.cpp ../timer.cpp + SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp ) endif() diff --git a/source/module_basis/module_pw/kernels/test/CMakeLists.txt b/source/module_basis/module_pw/kernels/test/CMakeLists.txt index 48188f269a..c190ded73d 100644 --- a/source/module_basis/module_pw/kernels/test/CMakeLists.txt +++ b/source/module_basis/module_pw/kernels/test/CMakeLists.txt @@ -8,5 +8,5 @@ AddTest( ../../../../module_base/parallel_global.cpp ../../../../module_base/parallel_reduce.cpp ../../../../module_base/parallel_comm.cpp ../../../../module_base/complexmatrix.cpp ../../../../module_base/matrix.cpp ../../../../module_base/memory.cpp - ../../../../module_base/libm/branred.cpp ../../../../module_base/libm/sincos.cpp + ../../../../module_base/libm/branred.cpp ../../../../module_base/libm/sincos.cpp ../../../../module_base/blas_connector.cpp ) \ No newline at end of file diff --git a/source/module_hsolver/kernels/test/CMakeLists.txt b/source/module_hsolver/kernels/test/CMakeLists.txt index 74d73364ce..c8d1f2cdd9 100644 --- a/source/module_hsolver/kernels/test/CMakeLists.txt +++ b/source/module_hsolver/kernels/test/CMakeLists.txt @@ -11,7 +11,7 @@ elseif() AddTest( TARGET Hsolver_Kernels_UTs LIBS parameter ${math_libs} base device - SOURCES math_kernel_test.cpp + SOURCES math_kernel_test.cpp ../../../module_base/blas_connector.cpp ) endif() diff --git a/source/module_relax/relax_new/test/CMakeLists.txt b/source/module_relax/relax_new/test/CMakeLists.txt index c6ec3d0735..1835f0eca3 100644 --- a/source/module_relax/relax_new/test/CMakeLists.txt +++ b/source/module_relax/relax_new/test/CMakeLists.txt @@ -9,14 +9,14 @@ install(DIRECTORY support DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) AddTest( TARGET relax_new_line_search LIBS parameter - SOURCES line_search_test.cpp ../line_search.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/global_function.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/blas_connector.cpp ../../../module_base/tool_quit.cpp + SOURCES line_search_test.cpp ../line_search.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/global_function.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/tool_quit.cpp ) AddTest( TARGET relax_new_relax - SOURCES relax_test.cpp ../relax.cpp ../line_search.cpp ../../../module_base/tool_quit.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/blas_connector.cpp + SOURCES relax_test.cpp ../relax.cpp ../line_search.cpp ../../../module_base/tool_quit.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/matrix3.cpp ../../../module_base/intarray.cpp ../../../module_base/tool_title.cpp ../../../module_base/global_function.cpp ../../../module_base/complexmatrix.cpp ../../../module_base/matrix.cpp - ../../../module_base/complexarray.cpp ../../../module_base/tool_quit.cpp ../../../module_base/realarray.cpp + ../../../module_base/complexarray.cpp ../../../module_base/tool_quit.cpp ../../../module_base/realarray.cpp ../../../module_base/blas_connector.cpp LIBS parameter ${math_libs} ) \ No newline at end of file From 051247159673318b6affd2db384c5e8dd46b271e Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Tue, 15 Oct 2024 20:14:12 +0800 Subject: [PATCH 11/13] Fix limxc build failing --- source/module_hamilt_general/module_xc/test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/source/module_hamilt_general/module_xc/test/CMakeLists.txt b/source/module_hamilt_general/module_xc/test/CMakeLists.txt index d00cd3b19c..7466f40a92 100644 --- a/source/module_hamilt_general/module_xc/test/CMakeLists.txt +++ b/source/module_hamilt_general/module_xc/test/CMakeLists.txt @@ -37,6 +37,7 @@ AddTest( ../../../module_base/memory.cpp ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp + ../../../module_base/blas_connector.cpp ) AddTest( From fe5dad69b7af6e1f9cfad3663d5c78b0677cd008 Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Wed, 16 Oct 2024 13:06:51 +0800 Subject: [PATCH 12/13] Add device_type flag --- source/module_base/blas_connector.cpp | 114 +++++++++++++++----------- source/module_base/blas_connector.h | 45 +++++----- 2 files changed, 91 insertions(+), 68 deletions(-) diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp index 11dc851e0e..8f954889bb 100644 --- a/source/module_base/blas_connector.cpp +++ b/source/module_base/blas_connector.cpp @@ -1,143 +1,165 @@ #include "blas_connector.h" -void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY) +void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY, base_device::AbacusDevice_t device_type) { - saxpy_(&n, &alpha, X, &incX, Y, &incY); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + saxpy_(&n, &alpha, X, &incX, Y, &incY); } -void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY) +void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY, base_device::AbacusDevice_t device_type) { - daxpy_(&n, &alpha, X, &incX, Y, &incY); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + daxpy_(&n, &alpha, X, &incX, Y, &incY); } -void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) +void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) { - caxpy_(&n, &alpha, X, &incX, Y, &incY); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + caxpy_(&n, &alpha, X, &incX, Y, &incY); } -void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY) +void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) { - zaxpy_(&n, &alpha, X, &incX, Y, &incY); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + zaxpy_(&n, &alpha, X, &incX, Y, &incY); } // x=a*x -void BlasConnector::scal( const int n, const float alpha, float *X, const int incX) +void BlasConnector::scal( const int n, const float alpha, float *X, const int incX, base_device::AbacusDevice_t device_type) { - sscal_(&n, &alpha, X, &incX); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + sscal_(&n, &alpha, X, &incX); } -void BlasConnector::scal( const int n, const double alpha, double *X, const int incX) +void BlasConnector::scal( const int n, const double alpha, double *X, const int incX, base_device::AbacusDevice_t device_type) { - dscal_(&n, &alpha, X, &incX); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + dscal_(&n, &alpha, X, &incX); } -void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX) +void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) { - cscal_(&n, &alpha, X, &incX); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + cscal_(&n, &alpha, X, &incX); } -void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX) +void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) { - zscal_(&n, &alpha, X, &incX); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + zscal_(&n, &alpha, X, &incX); } // d=x*y -float BlasConnector::dot( const int n, const float *X, const int incX, const float *Y, const int incY) +float BlasConnector::dot( const int n, const float *X, const int incX, const float *Y, const int incY, base_device::AbacusDevice_t device_type) { - return sdot_(&n, X, &incX, Y, &incY); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + return sdot_(&n, X, &incX, Y, &incY); } -double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY) +double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY, base_device::AbacusDevice_t device_type) { - return ddot_(&n, X, &incX, Y, &incY); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + return ddot_(&n, X, &incX, Y, &incY); } // C = a * A.? * B.? + b * C void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc) + const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) { - sgemm_(&transb, &transa, &n, &m, &k, + if (device_type == base_device::AbacusDevice_t::CpuDevice) + sgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); } void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc) + const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) { - dgemm_(&transb, &transa, &n, &m, &k, + if (device_type == base_device::AbacusDevice_t::CpuDevice) + dgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); } void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc) + const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) { - cgemm_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + cgemm_(&transb, &transa, &n, &m, &k, + &alpha, b, &ldb, a, &lda, + &beta, c, &ldc); } void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc) + const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) { - zgemm_(&transb, &transa, &n, &m, &k, + if (device_type == base_device::AbacusDevice_t::CpuDevice) + zgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); } void BlasConnector::gemv(const char trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* X, const int incx, - const double beta, double* Y, const int incy) + const double beta, double* Y, const int incy, base_device::AbacusDevice_t device_type) { - dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } void BlasConnector::gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy) + const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) { - cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } void BlasConnector::gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy) + const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) { - zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } // out = ||x||_2 -float BlasConnector::nrm2( const int n, const float *X, const int incX ) +float BlasConnector::nrm2( const int n, const float *X, const int incX, base_device::AbacusDevice_t device_type ) { - return snrm2_( &n, X, &incX ); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + return snrm2_( &n, X, &incX ); } -double BlasConnector::nrm2( const int n, const double *X, const int incX ) +double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type ) { - return dnrm2_( &n, X, &incX ); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + return dnrm2_( &n, X, &incX ); } -double BlasConnector::nrm2( const int n, const std::complex *X, const int incX ) +double BlasConnector::nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type ) { - return dznrm2_( &n, X, &incX ); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + return dznrm2_( &n, X, &incX ); } // copies a into b -void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy) +void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) { - dcopy_(&n, a, &incx, b, &incy); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + dcopy_(&n, a, &incx, b, &incy); } -void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy) +void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) { - zcopy_(&n, a, &incx, b, &incy); + if (device_type == base_device::AbacusDevice_t::CpuDevice) + zcopy_(&n, a, &incx, b, &incy); } \ No newline at end of file diff --git a/source/module_base/blas_connector.h b/source/module_base/blas_connector.h index c7cf2ed73e..b819b6852e 100644 --- a/source/module_base/blas_connector.h +++ b/source/module_base/blas_connector.h @@ -2,6 +2,7 @@ #define BLAS_CONNECTOR_H #include +#include "module_base/module_device/types.h" // These still need to be linked in the header file // Because quite a lot of code will directly use the original cblas kernels. @@ -119,40 +120,40 @@ class BlasConnector // Peize Lin add 2016-08-04 // y=a*x+y static - void axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY); + void axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); + void axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY); + void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY); + void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); // Peize Lin add 2016-08-04 // x=a*x static - void scal( const int n, const float alpha, float *X, const int incX); + void scal( const int n, const float alpha, float *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void scal( const int n, const double alpha, double *X, const int incX); + void scal( const int n, const double alpha, double *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void scal( const int n, const std::complex alpha, std::complex *X, const int incX); + void scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void scal( const int n, const std::complex alpha, std::complex *X, const int incX); + void scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); // Peize Lin add 2017-10-27 // d=x*y static - float dot( const int n, const float *X, const int incX, const float *Y, const int incY); + float dot( const int n, const float *X, const int incX, const float *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - double dot( const int n, const double *X, const int incX, const double *Y, const int incY); + double dot( const int n, const double *X, const int incX, const double *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); // Peize Lin add 2017-10-27, fix bug trans 2019-01-17 @@ -160,57 +161,57 @@ class BlasConnector static void gemm(const char transa, const char transb, const int m, const int n, const int k, const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc); + const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static void gemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc); + const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static void gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc); + const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static void gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc); + const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static void gemv(const char trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* X, const int incx, - const double beta, double* Y, const int incy); + const double beta, double* Y, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static void gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy); + const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static void gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy); + const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); // Peize Lin add 2018-06-12 // out = ||x||_2 static - float nrm2( const int n, const float *X, const int incX ); + float nrm2( const int n, const float *X, const int, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice ); static - double nrm2( const int n, const double *X, const int incX ); + double nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice ); static - double nrm2( const int n, const std::complex *X, const int incX ); + double nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice ); // copies a into b static - void copy(const long n, const double *a, const int incx, double *b, const int incy); + void copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy); + void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); }; // If GATHER_INFO is defined, the original function is replaced with a "i" suffix, From dad1a119c823ba8e995b2d578e1450d5db510662 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:28:01 +0000 Subject: [PATCH 13/13] [pre-commit.ci lite] apply automatic fixes --- source/module_base/blas_connector.cpp | 66 ++++++++++++++++++--------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp index 8f954889bb..8da2b802fa 100644 --- a/source/module_base/blas_connector.cpp +++ b/source/module_base/blas_connector.cpp @@ -2,164 +2,186 @@ void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { saxpy_(&n, &alpha, X, &incX, Y, &incY); } +} void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { daxpy_(&n, &alpha, X, &incX, Y, &incY); } +} void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { caxpy_(&n, &alpha, X, &incX, Y, &incY); } +} void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { zaxpy_(&n, &alpha, X, &incX, Y, &incY); } +} // x=a*x void BlasConnector::scal( const int n, const float alpha, float *X, const int incX, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { sscal_(&n, &alpha, X, &incX); } +} void BlasConnector::scal( const int n, const double alpha, double *X, const int incX, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { dscal_(&n, &alpha, X, &incX); } +} void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { cscal_(&n, &alpha, X, &incX); } +} void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { zscal_(&n, &alpha, X, &incX); } +} // d=x*y float BlasConnector::dot( const int n, const float *X, const int incX, const float *Y, const int incY, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { return sdot_(&n, X, &incX, Y, &incY); } +} double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { return ddot_(&n, X, &incX, Y, &incY); } +} // C = a * A.? * B.? + b * C void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const float alpha, const float *a, const int lda, const float *b, const int ldb, const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { sgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); } +} void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { dgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); } +} void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { cgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); } +} void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { zgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); } +} void BlasConnector::gemv(const char trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* X, const int incx, const double beta, double* Y, const int incy, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } +} void BlasConnector::gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } +} void BlasConnector::gemv(const char trans, const int m, const int n, const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); } +} // out = ||x||_2 float BlasConnector::nrm2( const int n, const float *X, const int incX, base_device::AbacusDevice_t device_type ) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { return snrm2_( &n, X, &incX ); } +} double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type ) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { return dnrm2_( &n, X, &incX ); } +} double BlasConnector::nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type ) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { return dznrm2_( &n, X, &incX ); } +} // copies a into b void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { dcopy_(&n, a, &incx, b, &incy); } +} void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) + if (device_type == base_device::AbacusDevice_t::CpuDevice) { zcopy_(&n, a, &incx, b, &incy); +} } \ No newline at end of file