Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions include/caffe/layers/scalar_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#ifndef CAFFE_INNER_PRODUCT_LAYER_HPP_
#define CAFFE_INNER_PRODUCT_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Computes a product of two input Blobs, with the shape of the
* latter Blob "broadcast" to match the shape of the former.
* Equivalent to tiling the latter Blob, then computing the elementwise
* product.
*
* The second input may be omitted, in which case it's learned as a parameter
* of the layer.
*/
template <typename Dtype>
class ScalarLayer: public Layer<Dtype> {
public:
explicit ScalarLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Scalar"; }
// Scalar
virtual inline int MinBottomBlobs() const { return 1; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
/**
* In the below shape specifications, @f$ i @f$ denotes the value of the
* `axis` field given by `this->layer_param_.scalar_param().axis()`, after
* canonicalization (i.e., conversion from negative to positive index,
* if applicable).
*
* @param bottom input Blob vector (length 2)
* -# @f$ (d_0 \times ... \times
* d_i \times ... \times d_j \times ... \times d_n) @f$
* the first factor @f$ x @f$
* -# @f$ (d_i \times ... \times d_j) @f$
* the second factor @f$ y @f$
* @param top output Blob vector (length 1)
* -# @f$ (d_0 \times ... \times
* d_i \times ... \times d_j \times ... \times d_n) @f$
* the product @f$ z = x y @f$ computed after "broadcasting" y.
* Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$,
* then computing the elementwise product.
*/
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> sum_multiplier_;
Blob<Dtype> sum_result_;
Blob<Dtype> temp_;
int axis_;
int outer_dim_, scalar_dim_, inner_dim_;
};


} // namespace caffe

#endif // CAFFE_INNER_PRODUCT_LAYER_HPP_
187 changes: 187 additions & 0 deletions src/caffe/layers/scalar_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#include <algorithm>
#include <vector>

#include "caffe/filler.hpp"
#include "caffe/layers/scalar_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void ScalarLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (bottom.size() == 1 && this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else if (bottom.size() == 1) {
// scalar is a learned parameter; initialize it
const ScalarParameter& param = this->layer_param_.scalar_param();
axis_ = bottom[0]->CanonicalAxisIndex(param.axis());
const int num_axes = param.num_axes();
CHECK_GE(num_axes, -1) << "num_axes must be non-negative, "
<< "or -1 to extend to the end of bottom[0]";
if (num_axes >= 0) {
CHECK_GE(bottom[0]->num_axes(), axis_ + num_axes)
<< "scalar blob's shape extends past bottom[0]'s shape when applied "
<< "starting with bottom[0] axis = " << axis_;
}
this->blobs_.resize(1);
const vector<int>::const_iterator& shape_start =
bottom[0]->shape().begin() + axis_;
const vector<int>::const_iterator& shape_end =
(num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes);
vector<int> scalar_shape(shape_start, shape_end);
this->blobs_[0].reset(new Blob<Dtype>(scalar_shape));
FillerParameter filler_param(param.filler());
if (!param.has_filler()) {
// Default to unit (1) filler for identity operation.
filler_param.set_type("constant");
filler_param.set_value(1);
}
shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(filler_param));
filler->Fill(this->blobs_[0].get());
}
this->param_propagate_down_.resize(this->blobs_.size(), true);
}

template <typename Dtype>
void ScalarLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const ScalarParameter& param = this->layer_param_.scalar_param();
Blob<Dtype>* scalar = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
// Always set axis_ == 0 in special case where scalar is an actual scalar
// (num_axes == 0). Mathematically equivalent for any choice of axis_, so the
// actual setting can be safely ignored; and computation is most efficient
// with axis_ == 0 and (therefore) outer_dim_ == 1. (Setting axis_ to
// bottom[0]->num_axes() - 1, giving inner_dim_ == 1, would be equally
// performant.)
axis_ = (scalar->num_axes() == 0) ?
0 : bottom[0]->CanonicalAxisIndex(param.axis());
CHECK_GE(bottom[0]->num_axes(), axis_ + scalar->num_axes())
<< "scalar blob's shape extends past bottom[0]'s shape when applied "
<< "starting with bottom[0] axis = " << axis_;
for (int i = 0; i < scalar->num_axes(); ++i) {
CHECK_EQ(bottom[0]->shape(axis_ + i), scalar->shape(i))
<< "dimension mismatch between bottom[0]->shape(" << axis_ + i
<< ") and scalar->shape(" << i << ")";
}
outer_dim_ = bottom[0]->count(0, axis_);
scalar_dim_ = scalar->count();
inner_dim_ = bottom[0]->count(axis_ + scalar->num_axes());
if (bottom[0] == top[0]) { // in-place computation
temp_.ReshapeLike(*bottom[0]);
} else {
top[0]->ReshapeLike(*bottom[0]);
}
sum_result_.Reshape(vector<int>(1, outer_dim_ * scalar_dim_));
const int sum_mult_size = std::max(outer_dim_, inner_dim_);
sum_multiplier_.Reshape(vector<int>(1, sum_mult_size));
if (sum_multiplier_.cpu_data()[sum_mult_size - 1] != Dtype(1)) {
caffe_set(sum_mult_size, Dtype(1), sum_multiplier_.mutable_cpu_data());
}
}

template <typename Dtype>
void ScalarLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
if (bottom[0] == top[0]) {
// In-place computation; need to store bottom data before overwriting it.
// Note that this is only necessary for Backward; we could skip this if not
// doing Backward, but Caffe currently provides no way of knowing whether
// we'll need to do Backward at the time of the Forward call.
caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(),
temp_.mutable_cpu_data());
}
const Dtype* scalar_data =
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
for (int n = 0; n < outer_dim_; ++n) {
for (int d = 0; d < scalar_dim_; ++d) {
const Dtype factor = scalar_data[d];
caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data);
bottom_data += inner_dim_;
top_data += inner_dim_;
}
}
}

template <typename Dtype>
void ScalarLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const bool scalar_param = (bottom.size() == 1);
Blob<Dtype>* scalar = scalar_param ? this->blobs_[0].get() : bottom[1];
if ((!scalar_param && propagate_down[1]) ||
(scalar_param && this->param_propagate_down_[0])) {
const Dtype* top_diff = top[0]->cpu_diff();
const bool in_place = (bottom[0] == top[0]);
const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data();
// Hack: store big eltwise product in bottom[0] diff, except in the special
// case where this layer itself does the eltwise product, in which case we
// can store it directly in the scalar diff, and we're done.
// If we're computing in-place (and not doing eltwise computation), this
// hack doesn't work and we store the product in temp_.
const bool is_eltwise = (bottom[0]->count() == scalar->count());
Dtype* product = (is_eltwise ? scalar->mutable_cpu_diff() :
(in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff()));
caffe_mul(top[0]->count(), top_diff, bottom_data, product);
if (!is_eltwise) {
Dtype* sum_result = NULL;
if (inner_dim_ == 1) {
sum_result = product;
} else if (sum_result_.count() == 1) {
const Dtype* sum_mult = sum_multiplier_.cpu_data();
Dtype* scalar_diff = scalar->mutable_cpu_diff();
if (scalar_param) {
Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult);
*scalar_diff += result;
} else {
*scalar_diff = caffe_cpu_dot(inner_dim_, product, sum_mult);
}
} else {
const Dtype* sum_mult = sum_multiplier_.cpu_data();
sum_result = (outer_dim_ == 1) ?
scalar->mutable_cpu_diff() : sum_result_.mutable_cpu_data();
caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
Dtype(1), product, sum_mult, Dtype(0), sum_result);
}
if (outer_dim_ != 1) {
const Dtype* sum_mult = sum_multiplier_.cpu_data();
Dtype* scalar_diff = scalar->mutable_cpu_diff();
if (scalar_dim_ == 1) {
if (scalar_param) {
Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
*scalar_diff += result;
} else {
*scalar_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
}
} else {
caffe_cpu_gemv(CblasTrans, outer_dim_, scalar_dim_,
Dtype(1), sum_result, sum_mult, Dtype(scalar_param),
scalar_diff);
}
}
}
}
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* scalar_data = scalar->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
for (int n = 0; n < outer_dim_; ++n) {
for (int d = 0; d < scalar_dim_; ++d) {
const Dtype factor = scalar_data[d];
caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff);
bottom_diff += inner_dim_;
top_diff += inner_dim_;
}
}
}
}

#ifdef CPU_ONLY
STUB_GPU(ScalarLayer);
#endif

INSTANTIATE_CLASS(ScalarLayer);
REGISTER_LAYER_CLASS(Scalar);

} // namespace caffe
113 changes: 113 additions & 0 deletions src/caffe/layers/scalar_layer.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#include <cfloat>
#include <vector>

#include "caffe/layers/scalar_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
__global__ void ScalarForward(const int n, const Dtype* in,
const Dtype* scalar, const int scalar_dim, const int inner_dim,
Dtype* out) {
CUDA_KERNEL_LOOP(index, n) {
const int scalar_index = (index / inner_dim) % scalar_dim;
out[index] = in[index] * scalar[scalar_index];
}
}

template <typename Dtype>
void ScalarLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const int count = top[0]->count();
const Dtype* bottom_data = bottom[0]->gpu_data();
if (bottom[0] == top[0]) {
// in-place computation; need to store bottom data before overwriting it.
// Note that this is only necessary for Backward; we could skip this if not
// doing Backward, but Caffe currently provides no way of knowing whether
// we'll need to do Backward at the time of the Forward call.
caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(),
temp_.mutable_gpu_data());
}
const Dtype* scalar_data =
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
ScalarForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, scalar_data, scalar_dim_, inner_dim_, top_data);
}

template <typename Dtype>
void ScalarLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const bool scalar_param = (bottom.size() == 1);
Blob<Dtype>* scalar = scalar_param ? this->blobs_[0].get() : bottom[1];
if ((!scalar_param && propagate_down[1]) ||
(scalar_param && this->param_propagate_down_[0])) {
const Dtype* top_diff = top[0]->gpu_diff();
const bool in_place = (bottom[0] == top[0]);
const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->gpu_data();
// Hack: store big eltwise product in bottom[0] diff, except in the special
// case where this layer itself does the eltwise product, in which case we
// can store it directly in the scalar diff, and we're done.
// If we're computing in-place (and not doing eltwise computation), this
// hack doesn't work and we store the product in temp_.
const bool is_eltwise = (bottom[0]->count() == scalar->count());
Dtype* product = (is_eltwise ? scalar->mutable_gpu_diff() :
(in_place ? temp_.mutable_gpu_data() : bottom[0]->mutable_gpu_diff()));
caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product);
if (!is_eltwise) {
Dtype* sum_result = NULL;
if (inner_dim_ == 1) {
sum_result = product;
} else if (sum_result_.count() == 1) {
const Dtype* sum_mult = sum_multiplier_.gpu_data();
Dtype* scalar_diff = scalar->mutable_cpu_diff();
if (scalar_param) {
Dtype result;
caffe_gpu_dot(inner_dim_, product, sum_mult, &result);
*scalar_diff += result;
} else {
caffe_gpu_dot(inner_dim_, product, sum_mult, scalar_diff);
}
} else {
const Dtype* sum_mult = sum_multiplier_.gpu_data();
sum_result = (outer_dim_ == 1) ?
scalar->mutable_gpu_diff() : sum_result_.mutable_gpu_data();
caffe_gpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
Dtype(1), product, sum_mult, Dtype(0), sum_result);
}
if (outer_dim_ != 1) {
const Dtype* sum_mult = sum_multiplier_.gpu_data();
if (scalar_dim_ == 1) {
Dtype* scalar_diff = scalar->mutable_cpu_diff();
if (scalar_param) {
Dtype result;
caffe_gpu_dot(outer_dim_, sum_mult, sum_result, &result);
*scalar_diff += result;
} else {
caffe_gpu_dot(outer_dim_, sum_mult, sum_result, scalar_diff);
}
} else {
Dtype* scalar_diff = scalar->mutable_gpu_diff();
caffe_gpu_gemv(CblasTrans, outer_dim_, scalar_dim_,
Dtype(1), sum_result, sum_mult, Dtype(scalar_param),
scalar_diff);
}
}
}
}
if (propagate_down[0]) {
const int count = top[0]->count();
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* scalar_data = scalar->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
ScalarForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, scalar_data, scalar_dim_, inner_dim_, bottom_diff);
}
}

INSTANTIATE_LAYER_GPU_FUNCS(ScalarLayer);

} // namespace caffe
Loading