From 910f3128c7947cacfc88ac40828ec7e694cb529a Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 25 Feb 2014 18:41:45 +0800
Subject: [PATCH 1/9] Add and test sum of absolute values math functions for
 CPU and GPU

---
 include/caffe/util/math_functions.hpp  |  7 +++++++
 src/caffe/test/test_math_functions.cpp | 24 ++++++++++++++++++++++++
 src/caffe/util/math_functions.cpp      | 20 ++++++++++++++++++++
 3 files changed, 51 insertions(+)
diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index db19acc3f7c..ab1cee17402 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -112,6 +112,13 @@ void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out);
 template <typename Dtype>
 int caffe_hamming_distance(const int n, const Dtype* x, const Dtype* y);
 
+// Returns the sum of the absolute values of the elements of vector x
+template <typename Dtype>
+Dtype caffe_cpu_asum(const int n, const Dtype* x);
+
+template <typename Dtype>
+void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y);
+
 }  // namespace caffe
 
 
diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp
index 45d43cc9415..ba8bfe72559 100644
--- a/src/caffe/test/test_math_functions.cpp
+++ b/src/caffe/test/test_math_functions.cpp
@@ -1,6 +1,7 @@
 // Copyright 2014 kloudkl@github
 
 #include <stdint.h>  // for uint32_t & uint64_t
+#include <cmath>  // for std::fabs
 
 #include "gtest/gtest.h"
 #include "caffe/blob.hpp"
@@ -74,4 +75,27 @@ TYPED_TEST(MathFunctionsTest, TestHammingDistance) {
            caffe_hamming_distance<TypeParam>(n, x, y));
 }
 
+TYPED_TEST(MathFunctionsTest, TestAsumCPU){
+  int n = this->blob_bottom_->count();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  TypeParam std_asum = 0;
+  for (int i = 0; i < n; ++i) {
+    std_asum += std::fabs(x[i]);
+  }
+  TypeParam cpu_asum = caffe_cpu_asum<TypeParam>(n, x);
+  CHECK_LT((cpu_asum - std_asum) / std_asum, 1e-2);
+}
+
+TYPED_TEST(MathFunctionsTest, TestAsumGPU){
+  int n = this->blob_bottom_->count();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  TypeParam std_asum = 0;
+  for (int i = 0; i < n; ++i) {
+    std_asum += std::fabs(x[i]);
+  }
+  TypeParam gpu_asum;
+  caffe_gpu_asum<TypeParam>(n, this->blob_bottom_->gpu_data(), &gpu_asum);
+  CHECK_LT((gpu_asum - std_asum) / std_asum, 1e-2);
+}
+
 }  // namespace caffe
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index 29bdaf6c708..04a5228e33f 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -390,4 +390,24 @@ int caffe_hamming_distance<double>(const int n, const double* x,
   return dist;
 }
 
+template <>
+float caffe_cpu_asum<float>(const int n, const float* x) {
+  return cblas_sasum(n, x, 1);
+}
+
+template <>
+double caffe_cpu_asum<double>(const int n, const double* x) {
+  return cblas_dasum(n, x, 1);
+}
+
+template <>
+void caffe_gpu_asum<float>(const int n, const float* x, float* y) {
+  CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y));
+}
+
+template <>
+void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
+  CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y));
+}
+
 }  // namespace caffe

From 348a338e7f205b3e8bf66d1782d9ecf9a5bb9268 Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 25 Feb 2014 19:16:44 +0800
Subject: [PATCH 2/9] Add and test element wise sign math funtions for CPU and
 GPU

---
 include/caffe/util/math_functions.hpp  | 17 +++++++++++++++
 src/caffe/test/test_math_functions.cpp | 29 ++++++++++++++++++++++----
 src/caffe/util/math_functions.cu       | 20 ++++++++++++++++++
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index ab1cee17402..fd9de876321 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -119,6 +119,23 @@ Dtype caffe_cpu_asum(const int n, const Dtype* x);
 template <typename Dtype>
 void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y);
 
+// the branchless, type-safe version from
+// http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c
+template<typename Dtype>
+inline char caffe_sign(Dtype val) {
+  return (Dtype(0) < val) - (val < Dtype(0));
+}
+
+template<typename Dtype>
+void caffe_cpu_sign(const int n, const Dtype* x, Dtype* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = caffe_sign<Dtype>(x[i]);
+  }
+}
+
+template<typename Dtype>
+void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
+
 }  // namespace caffe
 
 
diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp
index ba8bfe72559..09b4aa67719 100644
--- a/src/caffe/test/test_math_functions.cpp
+++ b/src/caffe/test/test_math_functions.cpp
@@ -1,7 +1,7 @@
 // Copyright 2014 kloudkl@github
 
-#include <stdint.h>  // for uint32_t & uint64_t
-#include <cmath>  // for std::fabs
+#include <stdint.h> // for uint32_t & uint64_t
+#include <cmath> // for std::fabs
 
 #include "gtest/gtest.h"
 #include "caffe/blob.hpp"
@@ -67,7 +67,7 @@ REF_HAMMING_DIST(double, uint64_t);
 typedef ::testing::Types<float, double> Dtypes;
 TYPED_TEST_CASE(MathFunctionsTest, Dtypes);
 
-TYPED_TEST(MathFunctionsTest, TestHammingDistance) {
+TYPED_TEST(MathFunctionsTest, TestHammingDistance){
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
   const TypeParam* y = this->blob_top_->cpu_data();
@@ -98,4 +98,25 @@ TYPED_TEST(MathFunctionsTest, TestAsumGPU){
   CHECK_LT((gpu_asum - std_asum) / std_asum, 1e-2);
 }
 
-}  // namespace caffe
+TYPED_TEST(MathFunctionsTest, TestSignCPU){
+  int n = this->blob_bottom_->count();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  caffe_cpu_sign<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
+  const TypeParam* signs = this->blob_bottom_->cpu_diff();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0));
+  }
+}
+
+TYPED_TEST(MathFunctionsTest, TestSignGPU){
+  int n = this->blob_bottom_->count();
+  caffe_gpu_sign<TypeParam>(n, this->blob_bottom_->gpu_data(),
+                            this->blob_bottom_->mutable_gpu_diff());
+  const TypeParam* signs = this->blob_bottom_->cpu_diff();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0));
+  }
+}
+
+}
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
index 5491e246c48..5aff39fddd4 100644
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -1,4 +1,5 @@
 // Copyright 2013 Yangqing Jia
+// Copyright 2014 kloudkl@github
 
 #include <cmath>
 #include <cstdlib>
@@ -33,5 +34,24 @@ void caffe_gpu_mul<double>(const int N, const double* a,
       N, a, b, y);
 }
 
+template<typename Dtype>
+__global__ void sign_kernel(const int n, const Dtype* x, Dtype* y) {
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index < n) {
+    y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0));
+  }
+}
+
+template <>
+void caffe_gpu_sign<float>(const int n, const float* x, float* y) {
+  sign_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
+      n, x, y);
+}
+
+template <>
+void caffe_gpu_sign<double>(const int n, const double* x, double* y) {
+  sign_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
+      n, x, y);
+}
 
 }  // namespace caffe

From f634899f44fda89692e12b8114889838f50a73d8 Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 25 Feb 2014 19:21:18 +0800
Subject: [PATCH 3/9] Instantiate caffe_cpu_sign for float and double

---
 src/caffe/util/math_functions.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index 04a5228e33f..bf244882c84 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -410,4 +410,10 @@ void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
   CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y));
 }
 
+template <>
+void caffe_cpu_sign<float>(const int n, const float* x, float* y);
+
+template <>
+void caffe_cpu_sign<double>(const int n, const double* x, double* y);
+
 }  // namespace caffe

From ccae3fa5879cf8c79a0ac9ce351f9868c2f74516 Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 25 Feb 2014 19:33:10 +0800
Subject: [PATCH 4/9] Add and test element wise abs math functions for CPU and
 GPU

---
 include/caffe/util/math_functions.hpp  | 12 +++++++++++-
 src/caffe/test/test_math_functions.cpp | 21 +++++++++++++++++++++
 src/caffe/util/math_functions.cpp      |  6 ++++++
 src/caffe/util/math_functions.cu       | 21 +++++++++++++++++++++
 4 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index fd9de876321..4878cf22226 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -4,7 +4,7 @@
 #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_
 #define CAFFE_UTIL_MATH_FUNCTIONS_H_
 
-
+#include <cmath> // for std::fabs
 #include <cublas_v2.h>
 
 #include "caffe/util/mkl_alternate.hpp"
@@ -136,6 +136,16 @@ void caffe_cpu_sign(const int n, const Dtype* x, Dtype* y) {
 template<typename Dtype>
 void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
 
+template <typename Dtype>
+void caffe_cpu_fabs(const int n, const Dtype* x, Dtype* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = std::fabs(x[i]);
+  }
+}
+
+template <typename Dtype>
+void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y);
+
 }  // namespace caffe
 
 
diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp
index 09b4aa67719..0f46273d257 100644
--- a/src/caffe/test/test_math_functions.cpp
+++ b/src/caffe/test/test_math_functions.cpp
@@ -119,4 +119,25 @@ TYPED_TEST(MathFunctionsTest, TestSignGPU){
   }
 }
 
+TYPED_TEST(MathFunctionsTest, TestFabsCPU){
+  int n = this->blob_bottom_->count();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  caffe_cpu_fabs<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
+  const TypeParam* abs_val = this->blob_bottom_->cpu_diff();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]);
+  }
+}
+
+TYPED_TEST(MathFunctionsTest, TestFabsGPU){
+  int n = this->blob_bottom_->count();
+  caffe_gpu_fabs<TypeParam>(n, this->blob_bottom_->gpu_data(),
+                            this->blob_bottom_->mutable_gpu_diff());
+  const TypeParam* abs_val = this->blob_bottom_->cpu_diff();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]);
+  }
+}
+
 }
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index bf244882c84..a24f2878b3e 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -416,4 +416,10 @@ void caffe_cpu_sign<float>(const int n, const float* x, float* y);
 template <>
 void caffe_cpu_sign<double>(const int n, const double* x, double* y);
 
+template <>
+void caffe_cpu_fabs<float>(const int n, const float* x, float* y);
+
+template <>
+void caffe_cpu_fabs<double>(const int n, const double* x, double* y);
+
 }  // namespace caffe
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
index 5aff39fddd4..72cbb00c357 100644
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -4,6 +4,7 @@
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
+#include <math_functions.h> // CUDA's, not caffe's, for fabs
 
 #include "caffe/common.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -54,4 +55,24 @@ void caffe_gpu_sign<double>(const int n, const double* x, double* y) {
       n, x, y);
 }
 
+template<typename Dtype>
+__global__ void fabs_kernel(const int n, const Dtype* x, Dtype* y) {
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index < n) {
+    y[index] = fabs(x[index]);
+  }
+}
+
+template <>
+void caffe_gpu_fabs<float>(const int n, const float* x, float* y) {
+  fabs_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
+      n, x, y);
+}
+
+template <>
+void caffe_gpu_fabs<double>(const int n, const double* x, double* y) {
+  fabs_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
+      n, x, y);
+}
+
 }  // namespace caffe

From b458b41d6844d9ffcbc318f8ffb97458997cb5fc Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 25 Feb 2014 20:06:05 +0800
Subject: [PATCH 5/9] Use macro to simplify element wise cpu math functions

---
 include/caffe/util/math_functions.hpp | 33 ++++++++++++++++++---------
 src/caffe/util/math_functions.cpp     | 13 ++---------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 4878cf22226..ec297bf08c9 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -126,22 +126,33 @@ inline char caffe_sign(Dtype val) {
   return (Dtype(0) < val) - (val < Dtype(0));
 }
 
-template<typename Dtype>
-void caffe_cpu_sign(const int n, const Dtype* x, Dtype* y) {
-  for (int i = 0; i < n; ++i) {
-    y[i] = caffe_sign<Dtype>(x[i]);
+// The following two macros are modifications of DEFINE_VSL_UNARY_FUNC
+//   in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp.
+// Please refer to commit 7e8ef25c7 of the boost-eigen branch.
+// Git cherry picking that commit caused a conflict hard to resolve and
+//   copying that file in convenient for code reviewing.
+// So they have to be pasted here temporarily.
+#define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \
+  template<typename Dtype> \
+  void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \
+    CHECK_GT(n, 0); CHECK(x); CHECK(y); \
+    for (int i = 0; i < n; ++i) { \
+      operation; \
+    } \
   }
-}
+
+#define INSTANTIATE_CAFFE_CPU_UNARY_FUNC(name) \
+  template <> \
+  void caffe_cpu_##name<float>(const int n, const float* x, float* y); \
+  template <> \
+  void caffe_cpu_##name<double>(const int n, const double* x, double* y)
+
+DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]));
 
 template<typename Dtype>
 void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
 
-template <typename Dtype>
-void caffe_cpu_fabs(const int n, const Dtype* x, Dtype* y) {
-  for (int i = 0; i < n; ++i) {
-    y[i] = std::fabs(x[i]);
-  }
-}
+DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
 
 template <typename Dtype>
 void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y);
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index a24f2878b3e..47be94a8ba0 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -410,16 +410,7 @@ void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
   CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y));
 }
 
-template <>
-void caffe_cpu_sign<float>(const int n, const float* x, float* y);
-
-template <>
-void caffe_cpu_sign<double>(const int n, const double* x, double* y);
-
-template <>
-void caffe_cpu_fabs<float>(const int n, const float* x, float* y);
-
-template <>
-void caffe_cpu_fabs<double>(const int n, const double* x, double* y);
+INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign);
+INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs);
 
 }  // namespace caffe

From b1f6eb0b91c504cba5c77c08ec0f0c1f0508d2ff Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 25 Feb 2014 20:26:55 +0800
Subject: [PATCH 6/9] Add and test non-in-place scale math functions for CPU
 and GPU

---
 include/caffe/util/math_functions.hpp  |  6 ++++++
 src/caffe/test/test_math_functions.cpp | 26 ++++++++++++++++++++++++
 src/caffe/util/math_functions.cpp      | 28 ++++++++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index ec297bf08c9..5d4a8e93f59 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -157,6 +157,12 @@ DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
 template <typename Dtype>
 void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y);
 
+template <typename Dtype>
+void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
+
+template <typename Dtype>
+void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
+
 }  // namespace caffe
 
 
diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp
index 0f46273d257..00f28badb86 100644
--- a/src/caffe/test/test_math_functions.cpp
+++ b/src/caffe/test/test_math_functions.cpp
@@ -140,4 +140,30 @@ TYPED_TEST(MathFunctionsTest, TestFabsGPU){
   }
 }
 
+TYPED_TEST(MathFunctionsTest, TestScaleCPU){
+  int n = this->blob_bottom_->count();
+  TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() %
+                                               this->blob_bottom_->count()];
+  caffe_cpu_scale<TypeParam>(n, alpha, this->blob_bottom_->cpu_data(),
+                             this->blob_bottom_->mutable_cpu_diff());
+  const TypeParam* scaled = this->blob_bottom_->cpu_diff();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(scaled[i], x[i] * alpha);
+  }
+}
+
+TYPED_TEST(MathFunctionsTest, TestScaleGPU){
+  int n = this->blob_bottom_->count();
+  TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() %
+                                               this->blob_bottom_->count()];
+  caffe_gpu_scale<TypeParam>(n, alpha, this->blob_bottom_->gpu_data(),
+                             this->blob_bottom_->mutable_gpu_diff());
+  const TypeParam* scaled = this->blob_bottom_->cpu_diff();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(scaled[i], x[i] * alpha);
+  }
+}
+
 }
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index 47be94a8ba0..ef347a1f65e 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -413,4 +413,32 @@ void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
 INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign);
 INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs);
 
+template <>
+void caffe_cpu_scale<float>(const int n, const float alpha, const float *x,
+                            float* y) {
+  cblas_scopy(n, x, 1, y, 1);
+  cblas_sscal(n, alpha, y, 1);
+}
+
+template <>
+void caffe_cpu_scale<double>(const int n, const double alpha, const double *x,
+                             double* y) {
+  cblas_dcopy(n, x, 1, y, 1);
+  cblas_dscal(n, alpha, y, 1);
+}
+
+template <>
+void caffe_gpu_scale<float>(const int n, const float alpha, const float *x,
+                            float* y) {
+  CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1));
+  CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1));
+}
+
+template <>
+void caffe_gpu_scale<double>(const int n, const double alpha, const double *x,
+                             double* y) {
+  CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1));
+  CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1));
+}
+
 }  // namespace caffe

From dc552e058ff35c002ec76d7a15c564bb447edffa Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Wed, 26 Feb 2014 11:23:20 +0800
Subject: [PATCH 7/9] Add signbit math func, simplify GPU defs & instantiations
 with a macro

---
 include/caffe/util/math_functions.hpp  | 28 ++++++++++++++++
 src/caffe/test/test_math_functions.cpp | 21 ++++++++++++
 src/caffe/util/math_functions.cpp      |  1 +
 src/caffe/util/math_functions.cu       | 44 +++-----------------------
 4 files changed, 54 insertions(+), 40 deletions(-)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 5d4a8e93f59..268cb2bd0df 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -5,6 +5,7 @@
 #define CAFFE_UTIL_MATH_FUNCTIONS_H_
 
 #include <cmath> // for std::fabs
+#include <math.h> // for signbit
 #include <cublas_v2.h>
 
 #include "caffe/util/mkl_alternate.hpp"
@@ -147,11 +148,38 @@ inline char caffe_sign(Dtype val) {
   template <> \
   void caffe_cpu_##name<double>(const int n, const double* x, double* y)
 
+
+#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \
+template<typename Dtype> \
+__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \
+  int index = threadIdx.x + blockIdx.x * blockDim.x; \
+  if (index < n) { \
+    operation; \
+  } \
+} \
+template <> \
+void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \
+  name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
+      n, x, y); \
+} \
+template <> \
+void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \
+  name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
+      n, x, y); \
+}
+
+// output is 1 for the positives, 0 for zero, and -1 for the negatives
 DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]));
 
 template<typename Dtype>
 void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
 
+// returns a nonzero value is the input has its sign bit set.
+DEFINE_CAFFE_CPU_UNARY_FUNC(signbit, y[i] = std::signbit(x[i]));
+
+template<typename Dtype>
+void caffe_gpu_signbit(const int n, const Dtype* x, Dtype* y);
+
 DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
 
 template <typename Dtype>
diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp
index 00f28badb86..d314d73b45c 100644
--- a/src/caffe/test/test_math_functions.cpp
+++ b/src/caffe/test/test_math_functions.cpp
@@ -119,6 +119,27 @@ TYPED_TEST(MathFunctionsTest, TestSignGPU){
   }
 }
 
+TYPED_TEST(MathFunctionsTest, TestSignbitCPU){
+  int n = this->blob_bottom_->count();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  caffe_cpu_signbit<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
+  const TypeParam* signbits = this->blob_bottom_->cpu_diff();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0);
+  }
+}
+
+TYPED_TEST(MathFunctionsTest, TestSignbitGPU){
+  int n = this->blob_bottom_->count();
+  caffe_gpu_signbit<TypeParam>(n, this->blob_bottom_->gpu_data(),
+                            this->blob_bottom_->mutable_gpu_diff());
+  const TypeParam* signbits = this->blob_bottom_->cpu_diff();
+  const TypeParam* x = this->blob_bottom_->cpu_data();
+  for (int i = 0; i < n; ++i) {
+    CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0);
+  }
+}
+
 TYPED_TEST(MathFunctionsTest, TestFabsCPU){
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index ef347a1f65e..ad83a998b09 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -411,6 +411,7 @@ void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
 }
 
 INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign);
+INSTANTIATE_CAFFE_CPU_UNARY_FUNC(signbit);
 INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs);
 
 template <>
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
index 72cbb00c357..e3eaacc10db 100644
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -4,7 +4,7 @@
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
-#include <math_functions.h> // CUDA's, not caffe's, for fabs
+#include <math_functions.h> // CUDA's, not caffe's, for fabs, signbit
 
 #include "caffe/common.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -35,44 +35,8 @@ void caffe_gpu_mul<double>(const int N, const double* a,
       N, a, b, y);
 }
 
-template<typename Dtype>
-__global__ void sign_kernel(const int n, const Dtype* x, Dtype* y) {
-  int index = threadIdx.x + blockIdx.x * blockDim.x;
-  if (index < n) {
-    y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0));
-  }
-}
-
-template <>
-void caffe_gpu_sign<float>(const int n, const float* x, float* y) {
-  sign_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
-      n, x, y);
-}
-
-template <>
-void caffe_gpu_sign<double>(const int n, const double* x, double* y) {
-  sign_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
-      n, x, y);
-}
-
-template<typename Dtype>
-__global__ void fabs_kernel(const int n, const Dtype* x, Dtype* y) {
-  int index = threadIdx.x + blockIdx.x * blockDim.x;
-  if (index < n) {
-    y[index] = fabs(x[index]);
-  }
-}
-
-template <>
-void caffe_gpu_fabs<float>(const int n, const float* x, float* y) {
-  fabs_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
-      n, x, y);
-}
-
-template <>
-void caffe_gpu_fabs<double>(const int n, const double* x, double* y) {
-  fabs_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(
-      n, x, y);
-}
+DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0)));
+DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(signbit, y[index] = signbit(x[index]));
+DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index]));
 
 }  // namespace caffe

From a288d9538a957e25f96b74f824535632e6bf4f03 Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 11 Mar 2014 17:05:27 +0800
Subject: [PATCH 8/9] Rename signbit in macros to sgnbit to avoid conflicts
 with std::signbit

---
 include/caffe/util/math_functions.hpp  | 8 +++++---
 src/caffe/test/test_math_functions.cpp | 8 ++++----
 src/caffe/util/math_functions.cpp      | 2 +-
 src/caffe/util/math_functions.cu       | 2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 268cb2bd0df..b18a058de58 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -174,11 +174,13 @@ DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]));
 template<typename Dtype>
 void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
 
-// returns a nonzero value is the input has its sign bit set.
-DEFINE_CAFFE_CPU_UNARY_FUNC(signbit, y[i] = std::signbit(x[i]));
+// This returns a nonzero value if the input has its sign bit set.
+// The name sngbit is meant to avoid conflicts with std::signbit in the macro
+using std::signbit;
+DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, y[i] = signbit(x[i]));
 
 template<typename Dtype>
-void caffe_gpu_signbit(const int n, const Dtype* x, Dtype* y);
+void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y);
 
 DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
 
diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp
index d314d73b45c..9a68d8731cd 100644
--- a/src/caffe/test/test_math_functions.cpp
+++ b/src/caffe/test/test_math_functions.cpp
@@ -119,19 +119,19 @@ TYPED_TEST(MathFunctionsTest, TestSignGPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestSignbitCPU){
+TYPED_TEST(MathFunctionsTest, TestSgnbitCPU){
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
-  caffe_cpu_signbit<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
+  caffe_cpu_sgnbit<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
   const TypeParam* signbits = this->blob_bottom_->cpu_diff();
   for (int i = 0; i < n; ++i) {
     CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0);
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestSignbitGPU){
+TYPED_TEST(MathFunctionsTest, TestSgnbitGPU){
   int n = this->blob_bottom_->count();
-  caffe_gpu_signbit<TypeParam>(n, this->blob_bottom_->gpu_data(),
+  caffe_gpu_sgnbit<TypeParam>(n, this->blob_bottom_->gpu_data(),
                             this->blob_bottom_->mutable_gpu_diff());
   const TypeParam* signbits = this->blob_bottom_->cpu_diff();
   const TypeParam* x = this->blob_bottom_->cpu_data();
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index ad83a998b09..80e420f5689 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -411,7 +411,7 @@ void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
 }
 
 INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign);
-INSTANTIATE_CAFFE_CPU_UNARY_FUNC(signbit);
+INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sgnbit);
 INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs);
 
 template <>
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
index e3eaacc10db..2cf1cfcbe70 100644
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -36,7 +36,7 @@ void caffe_gpu_mul<double>(const int N, const double* a,
 }
 
 DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0)));
-DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(signbit, y[index] = signbit(x[index]));
+DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index]));
 DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index]));
 
 }  // namespace caffe

From 4d53804846ddae85a26ce080fc545a0f99479d66 Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 18 Mar 2014 15:22:14 +0800
Subject: [PATCH 9/9] Fixed CPPLint errors related to math funtions

---
 include/caffe/util/math_functions.hpp  |  6 +++--
 src/caffe/test/test_math_functions.cpp | 37 +++++++++++++++-----------
 src/caffe/util/math_functions.cu       |  5 ++--
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index b18a058de58..81097ef9774 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -4,9 +4,9 @@
 #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_
 #define CAFFE_UTIL_MATH_FUNCTIONS_H_
 
-#include <cmath> // for std::fabs
-#include <math.h> // for signbit
 #include <cublas_v2.h>
+#include <math.h>  // for signbit
+#include <cmath>  // for std::fabs
 
 #include "caffe/util/mkl_alternate.hpp"
 
@@ -159,11 +159,13 @@ __global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \
 } \
 template <> \
 void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \
+  /* NOLINT_NEXT_LINE(whitespace/operators) */ \
   name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
       n, x, y); \
 } \
 template <> \
 void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \
+  /* NOLINT_NEXT_LINE(whitespace/operators) */ \
   name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
       n, x, y); \
 }
diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp
index 9a68d8731cd..ca059a9147c 100644
--- a/src/caffe/test/test_math_functions.cpp
+++ b/src/caffe/test/test_math_functions.cpp
@@ -1,7 +1,10 @@
 // Copyright 2014 kloudkl@github
 
-#include <stdint.h> // for uint32_t & uint64_t
-#include <cmath> // for std::fabs
+#include <stdint.h>  // for uint32_t & uint64_t
+#include <time.h>
+#include <climits>
+#include <cmath>  // for std::fabs
+#include <cstdlib>  // for rand_r
 
 #include "gtest/gtest.h"
 #include "caffe/blob.hpp"
@@ -23,8 +26,8 @@ class MathFunctionsTest : public ::testing::Test {
 
   virtual void SetUp() {
     Caffe::set_random_seed(1701);
-    this->blob_bottom_->Reshape(100, 70, 50, 30);
-    this->blob_top_->Reshape(100, 70, 50, 30);
+    this->blob_bottom_->Reshape(11, 17, 19, 23);
+    this->blob_top_->Reshape(11, 17, 19, 23);
     // fill the values
     FillerParameter filler_param;
     GaussianFiller<Dtype> filler(filler_param);
@@ -67,7 +70,7 @@ REF_HAMMING_DIST(double, uint64_t);
 typedef ::testing::Types<float, double> Dtypes;
 TYPED_TEST_CASE(MathFunctionsTest, Dtypes);
 
-TYPED_TEST(MathFunctionsTest, TestHammingDistance){
+TYPED_TEST(MathFunctionsTest, TestHammingDistance) {
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
   const TypeParam* y = this->blob_top_->cpu_data();
@@ -75,7 +78,7 @@ TYPED_TEST(MathFunctionsTest, TestHammingDistance){
            caffe_hamming_distance<TypeParam>(n, x, y));
 }
 
-TYPED_TEST(MathFunctionsTest, TestAsumCPU){
+TYPED_TEST(MathFunctionsTest, TestAsumCPU) {
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
   TypeParam std_asum = 0;
@@ -86,7 +89,7 @@ TYPED_TEST(MathFunctionsTest, TestAsumCPU){
   CHECK_LT((cpu_asum - std_asum) / std_asum, 1e-2);
 }
 
-TYPED_TEST(MathFunctionsTest, TestAsumGPU){
+TYPED_TEST(MathFunctionsTest, TestAsumGPU) {
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
   TypeParam std_asum = 0;
@@ -98,7 +101,7 @@ TYPED_TEST(MathFunctionsTest, TestAsumGPU){
   CHECK_LT((gpu_asum - std_asum) / std_asum, 1e-2);
 }
 
-TYPED_TEST(MathFunctionsTest, TestSignCPU){
+TYPED_TEST(MathFunctionsTest, TestSignCPU) {
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
   caffe_cpu_sign<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
@@ -108,7 +111,7 @@ TYPED_TEST(MathFunctionsTest, TestSignCPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestSignGPU){
+TYPED_TEST(MathFunctionsTest, TestSignGPU) {
   int n = this->blob_bottom_->count();
   caffe_gpu_sign<TypeParam>(n, this->blob_bottom_->gpu_data(),
                             this->blob_bottom_->mutable_gpu_diff());
@@ -119,7 +122,7 @@ TYPED_TEST(MathFunctionsTest, TestSignGPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestSgnbitCPU){
+TYPED_TEST(MathFunctionsTest, TestSgnbitCPU) {
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
   caffe_cpu_sgnbit<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
@@ -129,7 +132,7 @@ TYPED_TEST(MathFunctionsTest, TestSgnbitCPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestSgnbitGPU){
+TYPED_TEST(MathFunctionsTest, TestSgnbitGPU) {
   int n = this->blob_bottom_->count();
   caffe_gpu_sgnbit<TypeParam>(n, this->blob_bottom_->gpu_data(),
                             this->blob_bottom_->mutable_gpu_diff());
@@ -140,7 +143,7 @@ TYPED_TEST(MathFunctionsTest, TestSgnbitGPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestFabsCPU){
+TYPED_TEST(MathFunctionsTest, TestFabsCPU) {
   int n = this->blob_bottom_->count();
   const TypeParam* x = this->blob_bottom_->cpu_data();
   caffe_cpu_fabs<TypeParam>(n, x, this->blob_bottom_->mutable_cpu_diff());
@@ -150,7 +153,7 @@ TYPED_TEST(MathFunctionsTest, TestFabsCPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestFabsGPU){
+TYPED_TEST(MathFunctionsTest, TestFabsGPU) {
   int n = this->blob_bottom_->count();
   caffe_gpu_fabs<TypeParam>(n, this->blob_bottom_->gpu_data(),
                             this->blob_bottom_->mutable_gpu_diff());
@@ -161,8 +164,9 @@ TYPED_TEST(MathFunctionsTest, TestFabsGPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestScaleCPU){
+TYPED_TEST(MathFunctionsTest, TestScaleCPU) {
   int n = this->blob_bottom_->count();
+  // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
   TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() %
                                                this->blob_bottom_->count()];
   caffe_cpu_scale<TypeParam>(n, alpha, this->blob_bottom_->cpu_data(),
@@ -174,8 +178,9 @@ TYPED_TEST(MathFunctionsTest, TestScaleCPU){
   }
 }
 
-TYPED_TEST(MathFunctionsTest, TestScaleGPU){
+TYPED_TEST(MathFunctionsTest, TestScaleGPU) {
   int n = this->blob_bottom_->count();
+  // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
   TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() %
                                                this->blob_bottom_->count()];
   caffe_gpu_scale<TypeParam>(n, alpha, this->blob_bottom_->gpu_data(),
@@ -187,4 +192,4 @@ TYPED_TEST(MathFunctionsTest, TestScaleGPU){
   }
 }
 
-}
+}  // namespace caffe
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
index 2cf1cfcbe70..85753aa567a 100644
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -1,10 +1,10 @@
 // Copyright 2013 Yangqing Jia
 // Copyright 2014 kloudkl@github
 
+#include <math_functions.h>  // CUDA's, not caffe's, for fabs, signbit
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
-#include <math_functions.h> // CUDA's, not caffe's, for fabs, signbit
 
 #include "caffe/common.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -35,7 +35,8 @@ void caffe_gpu_mul<double>(const int N, const double* a,
       N, a, b, y);
 }
 
-DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0)));
+DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index])
+                                      - (x[index] < Dtype(0)));
 DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index]));
 DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index]));