From 68f51c8d13085c754bff77b8645414b03369a76d Mon Sep 17 00:00:00 2001 From: Aravindh Mahendran Date: Sun, 16 Feb 2014 10:43:34 -0500 Subject: [PATCH 1/2] Added tanh activation function layer. --- include/caffe/vision_layers.hpp | 17 ++++++ src/caffe/layer_factory.cpp | 2 + src/caffe/layers/tanh_layer.cu | 97 +++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 src/caffe/layers/tanh_layer.cu diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 4db2556de62..1861535dc52 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -44,6 +44,23 @@ class ReLULayer : public NeuronLayer { const bool propagate_down, vector*>* bottom); }; +template +class TanHLayer : public NeuronLayer { + public: + explicit TanHLayer(const LayerParameter& param) + : NeuronLayer(param) {} + + protected: + virtual void Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual void Forward_gpu(const vector*>& bottom, + vector*>* top); + + virtual Dtype Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual Dtype Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); +}; template class SigmoidLayer : public NeuronLayer { diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 48d6edf7c86..ff69c91e498 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -47,6 +47,8 @@ Layer* GetLayer(const LayerParameter& param) { return new PoolingLayer(param); } else if (type == "relu") { return new ReLULayer(param); + } else if (type == "tanh") { + return new TanHLayer(param); } else if (type == "sigmoid") { return new SigmoidLayer(param); } else if (type == "softmax") { diff --git a/src/caffe/layers/tanh_layer.cu b/src/caffe/layers/tanh_layer.cu new file mode 100644 index 00000000000..22e0831afb7 --- /dev/null +++ b/src/caffe/layers/tanh_layer.cu @@ -0,0 +1,97 @@ +// Copyright 2014 Aravindh Mahendran +// TanH neuron activation function layer. Adapted from ReLU layer code written by Yangqing Jia + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include + +namespace caffe { + +template +void TanHLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + Dtype exp2x; + const int count = bottom[0]->count(); + for (int i = 0; i < count; ++i) { + exp2x = exp(2*bottom_data[i]); + top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + } +} + +template +Dtype TanHLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int count = (*bottom)[0]->count(); + Dtype exp2x; + Dtype tanhx; + for (int i = 0; i < count; ++i) { + exp2x = exp(2*bottom_data[i]); + tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx); + } + } + return Dtype(0); +} + +template +__global__ void TanHForward(const int n, const Dtype* in, Dtype* out) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + Dtype exp2x = exp(2*in[index]); + out[index] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + } +} + +template +void TanHLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + const int count = bottom[0]->count(); + TanHForward<<>>( + count, bottom_data, top_data); + CUDA_POST_KERNEL_CHECK; + // << " count: " << count << " bottom_data: " + // << (unsigned long)bottom_data << " top_data: " << (unsigned long)top_data + // << " blocks: " << CAFFE_GET_BLOCKS(count) + // << " threads: " << CAFFE_CUDA_NUM_THREADS; +} + +template +__global__ void TanHBackward(const int n, const Dtype* in_diff, + const Dtype* in_data, Dtype* out_diff) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + Dtype exp2x = exp(2*in_data[index]); + Dtype tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + out_diff[index] = in_diff[index] * (1 - tanhx*tanhx); + } +} + +template +Dtype TanHLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + const int count = (*bottom)[0]->count(); + TanHBackward<<>>( + count, top_diff, bottom_data, bottom_diff); + CUDA_POST_KERNEL_CHECK; + } + return Dtype(0); +} + +INSTANTIATE_CLASS(TanHLayer); + + +} // namespace caffe From 9a45a0ae8adee910fa33f9acaf018312944f70f4 Mon Sep 17 00:00:00 2001 From: Aravindh Mahendran Date: Sun, 16 Feb 2014 11:04:26 -0500 Subject: [PATCH 2/2] Added a test for the tanh layer. --- src/caffe/test/test_tanh_layer.cpp | 102 +++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/caffe/test/test_tanh_layer.cpp diff --git a/src/caffe/test/test_tanh_layer.cpp b/src/caffe/test/test_tanh_layer.cpp new file mode 100644 index 00000000000..a4226a28b22 --- /dev/null +++ b/src/caffe/test/test_tanh_layer.cpp @@ -0,0 +1,102 @@ +// Copyright 2014 Aravindh Mahendran +// Adapted from other test files + +#include +#include +#include + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class TanHLayerTest : public ::testing::Test { + protected: + TanHLayerTest() + : blob_bottom_(new Blob(2, 10, 1, 1)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + }; + virtual ~TanHLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(TanHLayerTest, Dtypes); + +TYPED_TEST(TanHLayerTest, TestForwardCPU) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + TanHLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + // Test exact values + for (int i = 0; i < this->blob_bottom_->num(); ++i) { + for (int j = 0; j < this->blob_bottom_->channels(); ++j) { + for (int k = 0; k < this->blob_bottom_->height(); ++k) { + for (int l = 0; l < this->blob_bottom_->width(); ++l) { + EXPECT_GE(this->blob_top_->data_at(i,j,k,l) + 1e-4, + (exp(2*this->blob_bottom_->data_at(i,j,k,l))-1)/(exp(2*this->blob_bottom_->data_at(i,j,k,l))+1)); + EXPECT_LE(this->blob_top_->data_at(i,j,k,l) - 1e-4, + (exp(2*this->blob_bottom_->data_at(i,j,k,l))-1)/(exp(2*this->blob_bottom_->data_at(i,j,k,l))+1)); + } + } + } + } +} + +TYPED_TEST(TanHLayerTest, TestGradientCPU) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + TanHLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_); +} + +TYPED_TEST(TanHLayerTest, TestForwardGPU) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::GPU); + TanHLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + // Test exact values + for (int i = 0; i < this->blob_bottom_->num(); ++i) { + for (int j = 0; j < this->blob_bottom_->channels(); ++j) { + for (int k = 0; k < this->blob_bottom_->height(); ++k) { + for (int l = 0; l < this->blob_bottom_->width(); ++l) { + EXPECT_GE(this->blob_top_->data_at(i,j,k,l) + 1e-4, + (exp(2*this->blob_bottom_->data_at(i,j,k,l))-1)/(exp(2*this->blob_bottom_->data_at(i,j,k,l))+1)); + EXPECT_LE(this->blob_top_->data_at(i,j,k,l) - 1e-4, + (exp(2*this->blob_bottom_->data_at(i,j,k,l))-1)/(exp(2*this->blob_bottom_->data_at(i,j,k,l))+1)); + } + } + } + } +} + +TYPED_TEST(TanHLayerTest, TestGradientGPU) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::GPU); + TanHLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_); +} + +}