BVLC · jeffdonahue · Sep 3, 2015 · Dec 23, 2015 · Dec 24, 2015
diff --git a/include/caffe/layers/scalar_layer.hpp b/include/caffe/layers/scalar_layer.hpp
@@ -0,0 +1,76 @@
+#ifndef CAFFE_INNER_PRODUCT_LAYER_HPP_
+#define CAFFE_INNER_PRODUCT_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/**
+ * @brief Computes a product of two input Blobs, with the shape of the
+ *        latter Blob "broadcast" to match the shape of the former.
+ *        Equivalent to tiling the latter Blob, then computing the elementwise
+ *        product.
+ *
+ * The second input may be omitted, in which case it's learned as a parameter
+ * of the layer.
+ */
+template <typename Dtype>
+class ScalarLayer: public Layer<Dtype> {
+ public:
+  explicit ScalarLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "Scalar"; }
+  // Scalar
+  virtual inline int MinBottomBlobs() const { return 1; }
+  virtual inline int MaxBottomBlobs() const { return 2; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  /**
+   * In the below shape specifications, @f$ i @f$ denotes the value of the
+   * `axis` field given by `this->layer_param_.scalar_param().axis()`, after
+   * canonicalization (i.e., conversion from negative to positive index,
+   * if applicable).
+   *
+   * @param bottom input Blob vector (length 2)
+   *   -# @f$ (d_0 \times ... \times
+   *           d_i \times ... \times d_j \times ... \times d_n) @f$
+   *      the first factor @f$ x @f$
+   *   -# @f$ (d_i \times ... \times d_j) @f$
+   *      the second factor @f$ y @f$
+   * @param top output Blob vector (length 1)
+   *   -# @f$ (d_0 \times ... \times
+   *           d_i \times ... \times d_j \times ... \times d_n) @f$
+   *      the product @f$ z = x y @f$ computed after "broadcasting" y.
+   *      Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$,
+   *      then computing the elementwise product.
+   */
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  Blob<Dtype> sum_multiplier_;
+  Blob<Dtype> sum_result_;
+  Blob<Dtype> temp_;
+  int axis_;
+  int outer_dim_, scalar_dim_, inner_dim_;
+};
+
+
+}  // namespace caffe
+
+#endif  // CAFFE_INNER_PRODUCT_LAYER_HPP_
diff --git a/src/caffe/layers/scalar_layer.cpp b/src/caffe/layers/scalar_layer.cpp
@@ -0,0 +1,187 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layers/scalar_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  if (bottom.size() == 1 && this->blobs_.size() > 0) {
+    LOG(INFO) << "Skipping parameter initialization";
+  } else if (bottom.size() == 1) {
+    // scalar is a learned parameter; initialize it
+    const ScalarParameter& param = this->layer_param_.scalar_param();
+    axis_ = bottom[0]->CanonicalAxisIndex(param.axis());
+    const int num_axes = param.num_axes();
+    CHECK_GE(num_axes, -1) << "num_axes must be non-negative, "
+                           << "or -1 to extend to the end of bottom[0]";
+    if (num_axes >= 0) {
+      CHECK_GE(bottom[0]->num_axes(), axis_ + num_axes)
+          << "scalar blob's shape extends past bottom[0]'s shape when applied "
+          << "starting with bottom[0] axis = " << axis_;
+    }
+    this->blobs_.resize(1);
+    const vector<int>::const_iterator& shape_start =
+        bottom[0]->shape().begin() + axis_;
+    const vector<int>::const_iterator& shape_end =
+        (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes);
+    vector<int> scalar_shape(shape_start, shape_end);
+    this->blobs_[0].reset(new Blob<Dtype>(scalar_shape));
+    FillerParameter filler_param(param.filler());
+    if (!param.has_filler()) {
+      // Default to unit (1) filler for identity operation.
+      filler_param.set_type("constant");
+      filler_param.set_value(1);
+    }
+    shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(filler_param));
+    filler->Fill(this->blobs_[0].get());
+  }
+  this->param_propagate_down_.resize(this->blobs_.size(), true);
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  const ScalarParameter& param = this->layer_param_.scalar_param();
+  Blob<Dtype>* scalar = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
+  // Always set axis_ == 0 in special case where scalar is an actual scalar
+  // (num_axes == 0). Mathematically equivalent for any choice of axis_, so the
+  // actual setting can be safely ignored; and computation is most efficient
+  // with axis_ == 0 and (therefore) outer_dim_ == 1. (Setting axis_ to
+  // bottom[0]->num_axes() - 1, giving inner_dim_ == 1, would be equally
+  // performant.)
+  axis_ = (scalar->num_axes() == 0) ?
+      0 : bottom[0]->CanonicalAxisIndex(param.axis());
+  CHECK_GE(bottom[0]->num_axes(), axis_ + scalar->num_axes())
+      << "scalar blob's shape extends past bottom[0]'s shape when applied "
+      << "starting with bottom[0] axis = " << axis_;
+  for (int i = 0; i < scalar->num_axes(); ++i) {
+    CHECK_EQ(bottom[0]->shape(axis_ + i), scalar->shape(i))
+        << "dimension mismatch between bottom[0]->shape(" << axis_ + i
+        << ") and scalar->shape(" << i << ")";
+  }
+  outer_dim_ = bottom[0]->count(0, axis_);
+  scalar_dim_ = scalar->count();
+  inner_dim_ = bottom[0]->count(axis_ + scalar->num_axes());
+  if (bottom[0] == top[0]) {  // in-place computation
+    temp_.ReshapeLike(*bottom[0]);
+  } else {
+    top[0]->ReshapeLike(*bottom[0]);
+  }
+  sum_result_.Reshape(vector<int>(1, outer_dim_ * scalar_dim_));
+  const int sum_mult_size = std::max(outer_dim_, inner_dim_);
+  sum_multiplier_.Reshape(vector<int>(1, sum_mult_size));
+  if (sum_multiplier_.cpu_data()[sum_mult_size - 1] != Dtype(1)) {
+    caffe_set(sum_mult_size, Dtype(1), sum_multiplier_.mutable_cpu_data());
+  }
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Forward_cpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  if (bottom[0] == top[0]) {
+    // In-place computation; need to store bottom data before overwriting it.
+    // Note that this is only necessary for Backward; we could skip this if not
+    // doing Backward, but Caffe currently provides no way of knowing whether
+    // we'll need to do Backward at the time of the Forward call.
+    caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(),
+               temp_.mutable_cpu_data());
+  }
+  const Dtype* scalar_data =
+      ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
+  Dtype* top_data = top[0]->mutable_cpu_data();
+  for (int n = 0; n < outer_dim_; ++n) {
+    for (int d = 0; d < scalar_dim_; ++d) {
+      const Dtype factor = scalar_data[d];
+      caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data);
+      bottom_data += inner_dim_;
+      top_data += inner_dim_;
+    }
+  }
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  const bool scalar_param = (bottom.size() == 1);
+  Blob<Dtype>* scalar = scalar_param ? this->blobs_[0].get() : bottom[1];
+  if ((!scalar_param && propagate_down[1]) ||
+      (scalar_param && this->param_propagate_down_[0])) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    const bool in_place = (bottom[0] == top[0]);
+    const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data();
+    // Hack: store big eltwise product in bottom[0] diff, except in the special
+    // case where this layer itself does the eltwise product, in which case we
+    // can store it directly in the scalar diff, and we're done.
+    // If we're computing in-place (and not doing eltwise computation), this
+    // hack doesn't work and we store the product in temp_.
+    const bool is_eltwise = (bottom[0]->count() == scalar->count());
+    Dtype* product = (is_eltwise ? scalar->mutable_cpu_diff() :
+        (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff()));
+    caffe_mul(top[0]->count(), top_diff, bottom_data, product);
+    if (!is_eltwise) {
+      Dtype* sum_result = NULL;
+      if (inner_dim_ == 1) {
+        sum_result = product;
+      } else if (sum_result_.count() == 1) {
+        const Dtype* sum_mult = sum_multiplier_.cpu_data();
+        Dtype* scalar_diff = scalar->mutable_cpu_diff();
+        if (scalar_param) {
+          Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult);
+          *scalar_diff += result;
+        } else {
+          *scalar_diff = caffe_cpu_dot(inner_dim_, product, sum_mult);
+        }
+      } else {
+        const Dtype* sum_mult = sum_multiplier_.cpu_data();
+        sum_result = (outer_dim_ == 1) ?
+            scalar->mutable_cpu_diff() : sum_result_.mutable_cpu_data();
+        caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
+                       Dtype(1), product, sum_mult, Dtype(0), sum_result);
+      }
+      if (outer_dim_ != 1) {
+        const Dtype* sum_mult = sum_multiplier_.cpu_data();
+        Dtype* scalar_diff = scalar->mutable_cpu_diff();
+        if (scalar_dim_ == 1) {
+          if (scalar_param) {
+            Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
+            *scalar_diff += result;
+          } else {
+            *scalar_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
+          }
+        } else {
+          caffe_cpu_gemv(CblasTrans, outer_dim_, scalar_dim_,
+                         Dtype(1), sum_result, sum_mult, Dtype(scalar_param),
+                         scalar_diff);
+        }
+      }
+    }
+  }
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    const Dtype* scalar_data = scalar->cpu_data();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    for (int n = 0; n < outer_dim_; ++n) {
+      for (int d = 0; d < scalar_dim_; ++d) {
+        const Dtype factor = scalar_data[d];
+        caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff);
+        bottom_diff += inner_dim_;
+        top_diff += inner_dim_;
+      }
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(ScalarLayer);
+#endif
+
+INSTANTIATE_CLASS(ScalarLayer);
+REGISTER_LAYER_CLASS(Scalar);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/scalar_layer.cu b/src/caffe/layers/scalar_layer.cu
@@ -0,0 +1,113 @@
+#include <cfloat>
+#include <vector>
+
+#include "caffe/layers/scalar_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void ScalarForward(const int n, const Dtype* in,
+    const Dtype* scalar, const int scalar_dim, const int inner_dim,
+    Dtype* out) {
+  CUDA_KERNEL_LOOP(index, n) {
+    const int scalar_index = (index / inner_dim) % scalar_dim;
+    out[index] = in[index] * scalar[scalar_index];
+  }
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Forward_gpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  const int count = top[0]->count();
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  if (bottom[0] == top[0]) {
+    // in-place computation; need to store bottom data before overwriting it.
+    // Note that this is only necessary for Backward; we could skip this if not
+    // doing Backward, but Caffe currently provides no way of knowing whether
+    // we'll need to do Backward at the time of the Forward call.
+    caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(),
+               temp_.mutable_gpu_data());
+  }
+  const Dtype* scalar_data =
+      ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data();
+  Dtype* top_data = top[0]->mutable_gpu_data();
+  ScalarForward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+      <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+      count, bottom_data, scalar_data, scalar_dim_, inner_dim_, top_data);
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  const bool scalar_param = (bottom.size() == 1);
+  Blob<Dtype>* scalar = scalar_param ? this->blobs_[0].get() : bottom[1];
+  if ((!scalar_param && propagate_down[1]) ||
+      (scalar_param && this->param_propagate_down_[0])) {
+    const Dtype* top_diff = top[0]->gpu_diff();
+    const bool in_place = (bottom[0] == top[0]);
+    const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->gpu_data();
+    // Hack: store big eltwise product in bottom[0] diff, except in the special
+    // case where this layer itself does the eltwise product, in which case we
+    // can store it directly in the scalar diff, and we're done.
+    // If we're computing in-place (and not doing eltwise computation), this
+    // hack doesn't work and we store the product in temp_.
+    const bool is_eltwise = (bottom[0]->count() == scalar->count());
+    Dtype* product = (is_eltwise ? scalar->mutable_gpu_diff() :
+        (in_place ? temp_.mutable_gpu_data() : bottom[0]->mutable_gpu_diff()));
+    caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product);
+    if (!is_eltwise) {
+      Dtype* sum_result = NULL;
+      if (inner_dim_ == 1) {
+        sum_result = product;
+      } else if (sum_result_.count() == 1) {
+        const Dtype* sum_mult = sum_multiplier_.gpu_data();
+        Dtype* scalar_diff = scalar->mutable_cpu_diff();
+        if (scalar_param) {
+          Dtype result;
+          caffe_gpu_dot(inner_dim_, product, sum_mult, &result);
+          *scalar_diff += result;
+        } else {
+          caffe_gpu_dot(inner_dim_, product, sum_mult, scalar_diff);
+        }
+      } else {
+        const Dtype* sum_mult = sum_multiplier_.gpu_data();
+        sum_result = (outer_dim_ == 1) ?
+            scalar->mutable_gpu_diff() : sum_result_.mutable_gpu_data();
+        caffe_gpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
+                       Dtype(1), product, sum_mult, Dtype(0), sum_result);
+      }
+      if (outer_dim_ != 1) {
+        const Dtype* sum_mult = sum_multiplier_.gpu_data();
+        if (scalar_dim_ == 1) {
+          Dtype* scalar_diff = scalar->mutable_cpu_diff();
+          if (scalar_param) {
+            Dtype result;
+            caffe_gpu_dot(outer_dim_, sum_mult, sum_result, &result);
+            *scalar_diff += result;
+          } else {
+            caffe_gpu_dot(outer_dim_, sum_mult, sum_result, scalar_diff);
+          }
+        } else {
+          Dtype* scalar_diff = scalar->mutable_gpu_diff();
+          caffe_gpu_gemv(CblasTrans, outer_dim_, scalar_dim_,
+                         Dtype(1), sum_result, sum_mult, Dtype(scalar_param),
+                         scalar_diff);
+        }
+      }
+    }
+  }
+  if (propagate_down[0]) {
+    const int count = top[0]->count();
+    const Dtype* top_diff = top[0]->gpu_diff();
+    const Dtype* scalar_data = scalar->gpu_data();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    ScalarForward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+        <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, top_diff, scalar_data, scalar_dim_, inner_dim_, bottom_diff);
+  }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(ScalarLayer);
+
+}  // namespace caffe