diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index db19acc3f7c..81097ef9774 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -4,8 +4,9 @@ #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ #define CAFFE_UTIL_MATH_FUNCTIONS_H_ - #include +#include // for signbit +#include // for std::fabs #include "caffe/util/mkl_alternate.hpp" @@ -112,6 +113,88 @@ void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); template int caffe_hamming_distance(const int n, const Dtype* x, const Dtype* y); +// Returns the sum of the absolute values of the elements of vector x +template +Dtype caffe_cpu_asum(const int n, const Dtype* x); + +template +void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y); + +// the branchless, type-safe version from +// http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c +template +inline char caffe_sign(Dtype val) { + return (Dtype(0) < val) - (val < Dtype(0)); +} + +// The following two macros are modifications of DEFINE_VSL_UNARY_FUNC +// in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp. +// Please refer to commit 7e8ef25c7 of the boost-eigen branch. +// Git cherry picking that commit caused a conflict hard to resolve and +// copying that file in convenient for code reviewing. +// So they have to be pasted here temporarily. +#define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \ + template \ + void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \ + CHECK_GT(n, 0); CHECK(x); CHECK(y); \ + for (int i = 0; i < n; ++i) { \ + operation; \ + } \ + } + +#define INSTANTIATE_CAFFE_CPU_UNARY_FUNC(name) \ + template <> \ + void caffe_cpu_##name(const int n, const float* x, float* y); \ + template <> \ + void caffe_cpu_##name(const int n, const double* x, double* y) + + +#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \ +template \ +__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \ + int index = threadIdx.x + blockIdx.x * blockDim.x; \ + if (index < n) { \ + operation; \ + } \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const float* x, float* y) { \ + /* NOLINT_NEXT_LINE(whitespace/operators) */ \ + name##_kernel<<>>( \ + n, x, y); \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const double* x, double* y) { \ + /* NOLINT_NEXT_LINE(whitespace/operators) */ \ + name##_kernel<<>>( \ + n, x, y); \ +} + +// output is 1 for the positives, 0 for zero, and -1 for the negatives +DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign(x[i])); + +template +void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); + +// This returns a nonzero value if the input has its sign bit set. +// The name sngbit is meant to avoid conflicts with std::signbit in the macro +using std::signbit; +DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, y[i] = signbit(x[i])); + +template +void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y); + +DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])); + +template +void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y); + +template +void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); + +template +void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); + } // namespace caffe diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index 45d43cc9415..ca059a9147c 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -1,6 +1,10 @@ // Copyright 2014 kloudkl@github #include // for uint32_t & uint64_t +#include +#include +#include // for std::fabs +#include // for rand_r #include "gtest/gtest.h" #include "caffe/blob.hpp" @@ -22,8 +26,8 @@ class MathFunctionsTest : public ::testing::Test { virtual void SetUp() { Caffe::set_random_seed(1701); - this->blob_bottom_->Reshape(100, 70, 50, 30); - this->blob_top_->Reshape(100, 70, 50, 30); + this->blob_bottom_->Reshape(11, 17, 19, 23); + this->blob_top_->Reshape(11, 17, 19, 23); // fill the values FillerParameter filler_param; GaussianFiller filler(filler_param); @@ -74,4 +78,118 @@ TYPED_TEST(MathFunctionsTest, TestHammingDistance) { caffe_hamming_distance(n, x, y)); } +TYPED_TEST(MathFunctionsTest, TestAsumCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + TypeParam std_asum = 0; + for (int i = 0; i < n; ++i) { + std_asum += std::fabs(x[i]); + } + TypeParam cpu_asum = caffe_cpu_asum(n, x); + CHECK_LT((cpu_asum - std_asum) / std_asum, 1e-2); +} + +TYPED_TEST(MathFunctionsTest, TestAsumGPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + TypeParam std_asum = 0; + for (int i = 0; i < n; ++i) { + std_asum += std::fabs(x[i]); + } + TypeParam gpu_asum; + caffe_gpu_asum(n, this->blob_bottom_->gpu_data(), &gpu_asum); + CHECK_LT((gpu_asum - std_asum) / std_asum, 1e-2); +} + +TYPED_TEST(MathFunctionsTest, TestSignCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_sign(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* signs = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0)); + } +} + +TYPED_TEST(MathFunctionsTest, TestSignGPU) { + int n = this->blob_bottom_->count(); + caffe_gpu_sign(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* signs = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0)); + } +} + +TYPED_TEST(MathFunctionsTest, TestSgnbitCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_sgnbit(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + +TYPED_TEST(MathFunctionsTest, TestSgnbitGPU) { + int n = this->blob_bottom_->count(); + caffe_gpu_sgnbit(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + +TYPED_TEST(MathFunctionsTest, TestFabsCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_fabs(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* abs_val = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]); + } +} + +TYPED_TEST(MathFunctionsTest, TestFabsGPU) { + int n = this->blob_bottom_->count(); + caffe_gpu_fabs(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* abs_val = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]); + } +} + +TYPED_TEST(MathFunctionsTest, TestScaleCPU) { + int n = this->blob_bottom_->count(); + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) + TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() % + this->blob_bottom_->count()]; + caffe_cpu_scale(n, alpha, this->blob_bottom_->cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* scaled = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(scaled[i], x[i] * alpha); + } +} + +TYPED_TEST(MathFunctionsTest, TestScaleGPU) { + int n = this->blob_bottom_->count(); + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) + TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() % + this->blob_bottom_->count()]; + caffe_gpu_scale(n, alpha, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* scaled = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(scaled[i], x[i] * alpha); + } +} + } // namespace caffe diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 29bdaf6c708..80e420f5689 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -390,4 +390,56 @@ int caffe_hamming_distance(const int n, const double* x, return dist; } +template <> +float caffe_cpu_asum(const int n, const float* x) { + return cblas_sasum(n, x, 1); +} + +template <> +double caffe_cpu_asum(const int n, const double* x) { + return cblas_dasum(n, x, 1); +} + +template <> +void caffe_gpu_asum(const int n, const float* x, float* y) { + CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y)); +} + +template <> +void caffe_gpu_asum(const int n, const double* x, double* y) { + CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y)); +} + +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sgnbit); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs); + +template <> +void caffe_cpu_scale(const int n, const float alpha, const float *x, + float* y) { + cblas_scopy(n, x, 1, y, 1); + cblas_sscal(n, alpha, y, 1); +} + +template <> +void caffe_cpu_scale(const int n, const double alpha, const double *x, + double* y) { + cblas_dcopy(n, x, 1, y, 1); + cblas_dscal(n, alpha, y, 1); +} + +template <> +void caffe_gpu_scale(const int n, const float alpha, const float *x, + float* y) { + CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1)); + CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1)); +} + +template <> +void caffe_gpu_scale(const int n, const double alpha, const double *x, + double* y) { + CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1)); + CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1)); +} + } // namespace caffe diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 5491e246c48..85753aa567a 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -1,5 +1,7 @@ // Copyright 2013 Yangqing Jia +// Copyright 2014 kloudkl@github +#include // CUDA's, not caffe's, for fabs, signbit #include #include #include @@ -33,5 +35,9 @@ void caffe_gpu_mul(const int N, const double* a, N, a, b, y); } +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) + - (x[index] < Dtype(0))); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index])); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index])); } // namespace caffe