BVLC · shelhamer · Jun 26, 2014 · Jun 19, 2014 · Jun 19, 2014 · Jun 21, 2014
diff --git a/examples/cifar10/cifar10_full_test.prototxt b/examples/cifar10/cifar10_full_test.prototxt
@@ -166,16 +166,17 @@ layers {
     }
   }
 }
-layers {
-  name: "prob"
-  type: SOFTMAX
-  bottom: "ip1"
-  top: "prob"
-}
 layers {
   name: "accuracy"
   type: ACCURACY
-  bottom: "prob"
+  bottom: "ip1"
   bottom: "label"
   top: "accuracy"
 }
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "ip1"
+  bottom: "label"
+  top: "loss"
+}
diff --git a/examples/cifar10/cifar10_quick_test.prototxt b/examples/cifar10/cifar10_quick_test.prototxt
@@ -160,16 +160,17 @@ layers {
     }
   }
 }
-layers {
-  name: "prob"
-  type: SOFTMAX
-  bottom: "ip2"
-  top: "prob"
-}
 layers {
   name: "accuracy"
   type: ACCURACY
-  bottom: "prob"
+  bottom: "ip2"
   bottom: "label"
   top: "accuracy"
 }
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "ip2"
+  bottom: "label"
+  top: "loss"
+}
diff --git a/examples/feature_extraction/imagenet_val.prototxt b/examples/feature_extraction/imagenet_val.prototxt
@@ -227,3 +227,10 @@ layers {
   bottom: "label"
   top: "accuracy"
 }
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "fc8"
+  bottom: "label"
+  top: "loss"
+}
diff --git a/examples/imagenet/alexnet_val.prototxt b/examples/imagenet/alexnet_val.prototxt
@@ -213,15 +213,16 @@ layers {
   top: "fc8"
 }
 layers {
-  name: "prob"
-  type: SOFTMAX
+  name: "accuracy"
+  type: ACCURACY
   bottom: "fc8"
-  top: "prob"
+  bottom: "label"
+  top: "accuracy"
 }
 layers {
-  top: "accuracy"
-  name: "accuracy"
-  type: ACCURACY
-  bottom: "prob"
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "fc8"
   bottom: "label"
+  top: "loss"
 }
diff --git a/examples/imagenet/imagenet_val.prototxt b/examples/imagenet/imagenet_val.prototxt
@@ -212,16 +212,17 @@ layers {
     num_output: 1000
   }
 }
-layers {
-  name: "prob"
-  type: SOFTMAX
-  bottom: "fc8"
-  top: "prob"
-}
 layers {
   name: "accuracy"
   type: ACCURACY
-  bottom: "prob"
+  bottom: "fc8"
   bottom: "label"
   top: "accuracy"
 }
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "fc8"
+  bottom: "label"
+  top: "loss"
+}
diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt
@@ -262,19 +262,20 @@ test_net_param {
       }
     }
   }
-  layers {
-    name: "prob"
-    type: SOFTMAX
-    bottom: "ip2"
-    top: "prob"
-  }
   layers {
     name: "accuracy"
     type: ACCURACY
-    bottom: "prob"
+    bottom: "ip2"
     bottom: "label"
     top: "accuracy"
   }
+  layers {
+    name: "loss"
+    type: SOFTMAX_LOSS
+    bottom: "ip2"
+    bottom: "label"
+    top: "loss"
+  }
 }
 
 # The train set has 60K images, so we run 600 test iters (600 * 100 = 60K).
@@ -385,19 +386,20 @@ test_net_param {
       }
     }
   }
-  layers {
-    name: "prob"
-    type: SOFTMAX
-    bottom: "ip2"
-    top: "prob"
-  }
   layers {
     name: "accuracy"
     type: ACCURACY
-    bottom: "prob"
+    bottom: "ip2"
     bottom: "label"
     top: "accuracy"
   }
+  layers {
+    name: "loss"
+    type: SOFTMAX_LOSS
+    bottom: "ip2"
+    bottom: "label"
+    top: "loss"
+  }
 }
 
 # Expected results for first and last 500 iterations:

diff --git a/examples/mnist/lenet_test.prototxt b/examples/mnist/lenet_test.prototxt
@@ -102,16 +102,17 @@ layers {
     }
   }
 }
-layers {
-  name: "prob"
-  type: SOFTMAX
-  bottom: "ip2"
-  top: "prob"
-}
 layers {
   name: "accuracy"
   type: ACCURACY
-  bottom: "prob"
+  bottom: "ip2"
   bottom: "label"
   top: "accuracy"
 }
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "ip2"
+  bottom: "label"
+  top: "loss"
+}
diff --git a/examples/mnist/mnist_autoencoder_test.prototxt b/examples/mnist/mnist_autoencoder_test.prototxt
@@ -142,4 +142,5 @@ layers {
   bottom: "flatdata"
   name: "loss"
   type: EUCLIDEAN_LOSS
+  top: "loss"
 }
diff --git a/examples/pascal-finetuning/pascal_finetune_val.prototxt b/examples/pascal-finetuning/pascal_finetune_val.prototxt
@@ -313,16 +313,18 @@ layers {
     }
   }
 }
-layers {
-  name: "prob"
-  type: SOFTMAX
-  bottom: "fc8_pascal"
-  top: "prob"
-}
 layers {
   name: "accuracy"
   type: ACCURACY
-  bottom: "prob"
+  bottom: "fc8_pascal"
   bottom: "label"
   top: "accuracy"
 }
+layers {
+  name: "prob"
+  type: SOFTMAX_LOSS
+  bottom: "fc8_pascal"
+  bottom: "label"
+  top: "loss"
+}
+
diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp
@@ -37,7 +37,50 @@ class LossLayer : public Layer<Dtype> {
       const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {}
 
   virtual inline int ExactNumBottomBlobs() const { return 2; }
-  virtual inline int ExactNumTopBlobs() const { return 0; }
+  virtual inline int MaxTopBlobs() const { return 1; }
+};
+
+// Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer.
+template <typename Dtype> class SoftmaxLayer;
+
+/* SoftmaxWithLossLayer
+  Implements softmax and computes the loss.
+
+  It is preferred over separate softmax + multinomiallogisticloss
+  layers due to more numerically stable gradients.
+
+  In test, this layer could be replaced by simple softmax layer.
+*/
+template <typename Dtype>
+class SoftmaxWithLossLayer : public Layer<Dtype> {
+ public:
+  explicit SoftmaxWithLossLayer(const LayerParameter& param)
+      : Layer<Dtype>(param), softmax_layer_(new SoftmaxLayer<Dtype>(param)) {}
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+  virtual inline LayerParameter_LayerType type() const {
+    return LayerParameter_LayerType_SOFTMAX_LOSS;
+  }
+  virtual inline int ExactNumBottomBlobs() const { return 2; }
+  virtual inline int MaxTopBlobs() const { return 2; }
+
+ protected:
+  virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  shared_ptr<SoftmaxLayer<Dtype> > softmax_layer_;
+  // prob stores the output probability of the layer.
+  Blob<Dtype> prob_;
+  // Vector holders to call the underlying softmax layer forward and backward.
+  vector<Blob<Dtype>*> softmax_bottom_vec_;
+  vector<Blob<Dtype>*> softmax_top_vec_;
 };
 
 /* SigmoidCrossEntropyLossLayer
@@ -166,7 +209,7 @@ class MultinomialLogisticLossLayer : public LossLayer<Dtype> {
 
 /* AccuracyLayer
   Note: not an actual loss layer! Does not implement backwards step.
-  Computes the accuracy and logprob of a with respect to b.
+  Computes the accuracy of argmax(a) with respect to b.
 */
 template <typename Dtype>
 class AccuracyLayer : public Layer<Dtype> {
@@ -180,6 +223,9 @@ class AccuracyLayer : public Layer<Dtype> {
     return LayerParameter_LayerType_ACCURACY;
   }
 
+  virtual inline int ExactNumBottomBlobs() const { return 2; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);

diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
@@ -405,46 +405,6 @@ class SoftmaxLayer : public Layer<Dtype> {
   Blob<Dtype> scale_;
 };
 
-/* SoftmaxWithLossLayer
-  Implements softmax and computes the loss.
-
-  It is preferred over separate softmax + multinomiallogisticloss
-  layers due to more numerically stable gradients.
-
-  In test, this layer could be replaced by simple softmax layer.
-*/
-template <typename Dtype>
-class SoftmaxWithLossLayer : public Layer<Dtype> {
- public:
-  explicit SoftmaxWithLossLayer(const LayerParameter& param)
-      : Layer<Dtype>(param), softmax_layer_(new SoftmaxLayer<Dtype>(param)) {}
-  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
-
-  virtual inline LayerParameter_LayerType type() const {
-    return LayerParameter_LayerType_SOFTMAX_LOSS;
-  }
-  virtual inline int ExactNumBottomBlobs() const { return 2; }
-  virtual inline int ExactNumTopBlobs() const { return 0; }
-
- protected:
-  virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
-  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-
-  shared_ptr<SoftmaxLayer<Dtype> > softmax_layer_;
-  // prob stores the output probability of the layer.
-  Blob<Dtype> prob_;
-  // Vector holders to call the underlying softmax layer forward and backward.
-  vector<Blob<Dtype>*> softmax_bottom_vec_;
-  vector<Blob<Dtype>*> softmax_top_vec_;
-};
-
 /* SplitLayer
 */
 template <typename Dtype>

diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp
@@ -23,14 +23,13 @@ void AccuracyLayer<Dtype>::SetUp(
   CHECK_EQ(bottom[1]->channels(), 1);
   CHECK_EQ(bottom[1]->height(), 1);
   CHECK_EQ(bottom[1]->width(), 1);
-  (*top)[0]->Reshape(1, 2, 1, 1);
+  (*top)[0]->Reshape(1, 1, 1, 1);
 }
 
 template <typename Dtype>
 Dtype AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     vector<Blob<Dtype>*>* top) {
   Dtype accuracy = 0;
-  Dtype logprob = 0;
   const Dtype* bottom_data = bottom[0]->cpu_data();
   const Dtype* bottom_label = bottom[1]->cpu_data();
   int num = bottom[0]->num();
@@ -48,13 +47,10 @@ Dtype AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     if (max_id == static_cast<int>(bottom_label[i])) {
       ++accuracy;
     }
-    Dtype prob = max(bottom_data[i * dim + static_cast<int>(bottom_label[i])],
-                     Dtype(kLOG_THRESHOLD));
-    logprob -= log(prob);
   }
   // LOG(INFO) << "Accuracy: " << accuracy;
   (*top)[0]->mutable_cpu_data()[0] = accuracy / num;
-  (*top)[0]->mutable_cpu_data()[1] = logprob / num;
+
   // Accuracy layer should not be used as a loss function.
   return Dtype(0);
 }

diff --git a/src/caffe/layers/euclidean_loss_layer.cpp b/src/caffe/layers/euclidean_loss_layer.cpp
@@ -35,6 +35,9 @@ Dtype EuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       diff_.mutable_cpu_data());
   Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());
   Dtype loss = dot / bottom[0]->num() / Dtype(2);
+  if (top->size() == 1) {
+    (*top)[0]->mutable_cpu_data()[0] = loss;
+  }
   return loss;
 }
 

diff --git a/src/caffe/layers/infogain_loss_layer.cpp b/src/caffe/layers/infogain_loss_layer.cpp
@@ -48,6 +48,9 @@ Dtype InfogainLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       loss -= infogain_mat[label * dim + j] * log(prob);
     }
   }
+  if (top->size() == 1) {
+    (*top)[0]->mutable_cpu_data()[0] = loss / num;
+  }
   return loss / num;
 }