diff --git a/examples/cifar10/cifar10_full_test.prototxt b/examples/cifar10/cifar10_full_test.prototxt index 0e1957a9045..1f77b4f0348 100644 --- a/examples/cifar10/cifar10_full_test.prototxt +++ b/examples/cifar10/cifar10_full_test.prototxt @@ -166,16 +166,17 @@ layers { } } } -layers { - name: "prob" - type: SOFTMAX - bottom: "ip1" - top: "prob" -} layers { name: "accuracy" type: ACCURACY - bottom: "prob" + bottom: "ip1" bottom: "label" top: "accuracy" } +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip1" + bottom: "label" + top: "loss" +} diff --git a/examples/cifar10/cifar10_quick_test.prototxt b/examples/cifar10/cifar10_quick_test.prototxt index a154b9a0ea7..aa82c32aa24 100644 --- a/examples/cifar10/cifar10_quick_test.prototxt +++ b/examples/cifar10/cifar10_quick_test.prototxt @@ -160,16 +160,17 @@ layers { } } } -layers { - name: "prob" - type: SOFTMAX - bottom: "ip2" - top: "prob" -} layers { name: "accuracy" type: ACCURACY - bottom: "prob" + bottom: "ip2" bottom: "label" top: "accuracy" } +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + top: "loss" +} diff --git a/examples/feature_extraction/imagenet_val.prototxt b/examples/feature_extraction/imagenet_val.prototxt index 14bfe770ef8..b0451a1a114 100644 --- a/examples/feature_extraction/imagenet_val.prototxt +++ b/examples/feature_extraction/imagenet_val.prototxt @@ -227,3 +227,10 @@ layers { bottom: "label" top: "accuracy" } +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc8" + bottom: "label" + top: "loss" +} diff --git a/examples/imagenet/alexnet_val.prototxt b/examples/imagenet/alexnet_val.prototxt index 3fd6296ef9d..1d8d86b78ff 100644 --- a/examples/imagenet/alexnet_val.prototxt +++ b/examples/imagenet/alexnet_val.prototxt @@ -213,15 +213,16 @@ layers { top: "fc8" } layers { - name: "prob" - type: SOFTMAX + name: "accuracy" + type: ACCURACY bottom: "fc8" - top: "prob" + bottom: "label" + top: "accuracy" } layers { - top: "accuracy" - name: "accuracy" - type: ACCURACY - bottom: "prob" + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc8" bottom: "label" + top: "loss" } diff --git a/examples/imagenet/imagenet_val.prototxt b/examples/imagenet/imagenet_val.prototxt index dd26f40ea14..8be5150cdd2 100644 --- a/examples/imagenet/imagenet_val.prototxt +++ b/examples/imagenet/imagenet_val.prototxt @@ -212,16 +212,17 @@ layers { num_output: 1000 } } -layers { - name: "prob" - type: SOFTMAX - bottom: "fc8" - top: "prob" -} layers { name: "accuracy" type: ACCURACY - bottom: "prob" + bottom: "fc8" bottom: "label" top: "accuracy" } +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc8" + bottom: "label" + top: "loss" +} \ No newline at end of file diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt index 07cbc211414..ef851e0f656 100644 --- a/examples/mnist/lenet_consolidated_solver.prototxt +++ b/examples/mnist/lenet_consolidated_solver.prototxt @@ -262,19 +262,20 @@ test_net_param { } } } - layers { - name: "prob" - type: SOFTMAX - bottom: "ip2" - top: "prob" - } layers { name: "accuracy" type: ACCURACY - bottom: "prob" + bottom: "ip2" bottom: "label" top: "accuracy" } + layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + top: "loss" + } } # The train set has 60K images, so we run 600 test iters (600 * 100 = 60K). @@ -385,19 +386,20 @@ test_net_param { } } } - layers { - name: "prob" - type: SOFTMAX - bottom: "ip2" - top: "prob" - } layers { name: "accuracy" type: ACCURACY - bottom: "prob" + bottom: "ip2" bottom: "label" top: "accuracy" } + layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + top: "loss" + } } # Expected results for first and last 500 iterations: diff --git a/examples/mnist/lenet_test.prototxt b/examples/mnist/lenet_test.prototxt index 3b59b75513d..2497f02ae86 100644 --- a/examples/mnist/lenet_test.prototxt +++ b/examples/mnist/lenet_test.prototxt @@ -102,16 +102,17 @@ layers { } } } -layers { - name: "prob" - type: SOFTMAX - bottom: "ip2" - top: "prob" -} layers { name: "accuracy" type: ACCURACY - bottom: "prob" + bottom: "ip2" bottom: "label" top: "accuracy" } +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + top: "loss" +} diff --git a/examples/mnist/mnist_autoencoder_test.prototxt b/examples/mnist/mnist_autoencoder_test.prototxt index 5090e82fe0a..b52364c17fc 100644 --- a/examples/mnist/mnist_autoencoder_test.prototxt +++ b/examples/mnist/mnist_autoencoder_test.prototxt @@ -142,4 +142,5 @@ layers { bottom: "flatdata" name: "loss" type: EUCLIDEAN_LOSS + top: "loss" } diff --git a/examples/pascal-finetuning/pascal_finetune_val.prototxt b/examples/pascal-finetuning/pascal_finetune_val.prototxt index ff898fe7376..91ded585d85 100644 --- a/examples/pascal-finetuning/pascal_finetune_val.prototxt +++ b/examples/pascal-finetuning/pascal_finetune_val.prototxt @@ -313,16 +313,18 @@ layers { } } } -layers { - name: "prob" - type: SOFTMAX - bottom: "fc8_pascal" - top: "prob" -} layers { name: "accuracy" type: ACCURACY - bottom: "prob" + bottom: "fc8_pascal" bottom: "label" top: "accuracy" } +layers { + name: "prob" + type: SOFTMAX_LOSS + bottom: "fc8_pascal" + bottom: "label" + top: "loss" +} + diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 381bf0f4f8f..db7c63edca1 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -37,7 +37,50 @@ class LossLayer : public Layer { const vector*>& bottom, vector*>* top) {} virtual inline int ExactNumBottomBlobs() const { return 2; } - virtual inline int ExactNumTopBlobs() const { return 0; } + virtual inline int MaxTopBlobs() const { return 1; } +}; + +// Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer. +template class SoftmaxLayer; + +/* SoftmaxWithLossLayer + Implements softmax and computes the loss. + + It is preferred over separate softmax + multinomiallogisticloss + layers due to more numerically stable gradients. + + In test, this layer could be replaced by simple softmax layer. +*/ +template +class SoftmaxWithLossLayer : public Layer { + public: + explicit SoftmaxWithLossLayer(const LayerParameter& param) + : Layer(param), softmax_layer_(new SoftmaxLayer(param)) {} + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + virtual inline LayerParameter_LayerType type() const { + return LayerParameter_LayerType_SOFTMAX_LOSS; + } + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline int MaxTopBlobs() const { return 2; } + + protected: + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual void Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + + shared_ptr > softmax_layer_; + // prob stores the output probability of the layer. + Blob prob_; + // Vector holders to call the underlying softmax layer forward and backward. + vector*> softmax_bottom_vec_; + vector*> softmax_top_vec_; }; /* SigmoidCrossEntropyLossLayer @@ -166,7 +209,7 @@ class MultinomialLogisticLossLayer : public LossLayer { /* AccuracyLayer Note: not an actual loss layer! Does not implement backwards step. - Computes the accuracy and logprob of a with respect to b. + Computes the accuracy of argmax(a) with respect to b. */ template class AccuracyLayer : public Layer { @@ -180,6 +223,9 @@ class AccuracyLayer : public Layer { return LayerParameter_LayerType_ACCURACY; } + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + protected: virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index fc3dbbe1938..3fd7e2f8bdb 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -405,46 +405,6 @@ class SoftmaxLayer : public Layer { Blob scale_; }; -/* SoftmaxWithLossLayer - Implements softmax and computes the loss. - - It is preferred over separate softmax + multinomiallogisticloss - layers due to more numerically stable gradients. - - In test, this layer could be replaced by simple softmax layer. -*/ -template -class SoftmaxWithLossLayer : public Layer { - public: - explicit SoftmaxWithLossLayer(const LayerParameter& param) - : Layer(param), softmax_layer_(new SoftmaxLayer(param)) {} - virtual void SetUp(const vector*>& bottom, - vector*>* top); - - virtual inline LayerParameter_LayerType type() const { - return LayerParameter_LayerType_SOFTMAX_LOSS; - } - virtual inline int ExactNumBottomBlobs() const { return 2; } - virtual inline int ExactNumTopBlobs() const { return 0; } - - protected: - virtual Dtype Forward_cpu(const vector*>& bottom, - vector*>* top); - virtual Dtype Forward_gpu(const vector*>& bottom, - vector*>* top); - virtual void Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); - virtual void Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); - - shared_ptr > softmax_layer_; - // prob stores the output probability of the layer. - Blob prob_; - // Vector holders to call the underlying softmax layer forward and backward. - vector*> softmax_bottom_vec_; - vector*> softmax_top_vec_; -}; - /* SplitLayer */ template diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp index fbc943eaf2d..899750f869a 100644 --- a/src/caffe/layers/accuracy_layer.cpp +++ b/src/caffe/layers/accuracy_layer.cpp @@ -23,14 +23,13 @@ void AccuracyLayer::SetUp( CHECK_EQ(bottom[1]->channels(), 1); CHECK_EQ(bottom[1]->height(), 1); CHECK_EQ(bottom[1]->width(), 1); - (*top)[0]->Reshape(1, 2, 1, 1); + (*top)[0]->Reshape(1, 1, 1, 1); } template Dtype AccuracyLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { Dtype accuracy = 0; - Dtype logprob = 0; const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* bottom_label = bottom[1]->cpu_data(); int num = bottom[0]->num(); @@ -48,13 +47,10 @@ Dtype AccuracyLayer::Forward_cpu(const vector*>& bottom, if (max_id == static_cast(bottom_label[i])) { ++accuracy; } - Dtype prob = max(bottom_data[i * dim + static_cast(bottom_label[i])], - Dtype(kLOG_THRESHOLD)); - logprob -= log(prob); } // LOG(INFO) << "Accuracy: " << accuracy; (*top)[0]->mutable_cpu_data()[0] = accuracy / num; - (*top)[0]->mutable_cpu_data()[1] = logprob / num; + // Accuracy layer should not be used as a loss function. return Dtype(0); } diff --git a/src/caffe/layers/euclidean_loss_layer.cpp b/src/caffe/layers/euclidean_loss_layer.cpp index a894d470c64..766294997f0 100644 --- a/src/caffe/layers/euclidean_loss_layer.cpp +++ b/src/caffe/layers/euclidean_loss_layer.cpp @@ -35,6 +35,9 @@ Dtype EuclideanLossLayer::Forward_cpu(const vector*>& bottom, diff_.mutable_cpu_data()); Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); Dtype loss = dot / bottom[0]->num() / Dtype(2); + if (top->size() == 1) { + (*top)[0]->mutable_cpu_data()[0] = loss; + } return loss; } diff --git a/src/caffe/layers/infogain_loss_layer.cpp b/src/caffe/layers/infogain_loss_layer.cpp index ab6e67d73b1..3e7fc4f812b 100644 --- a/src/caffe/layers/infogain_loss_layer.cpp +++ b/src/caffe/layers/infogain_loss_layer.cpp @@ -48,6 +48,9 @@ Dtype InfogainLossLayer::Forward_cpu(const vector*>& bottom, loss -= infogain_mat[label * dim + j] * log(prob); } } + if (top->size() == 1) { + (*top)[0]->mutable_cpu_data()[0] = loss / num; + } return loss / num; } diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp index 14ea975ad0d..ac8ad216732 100644 --- a/src/caffe/layers/loss_layer.cpp +++ b/src/caffe/layers/loss_layer.cpp @@ -20,6 +20,10 @@ void LossLayer::SetUp( Layer::SetUp(bottom, top); CHECK_EQ(bottom[0]->num(), bottom[1]->num()) << "The data and label should have the same number."; + if (top->size() == 1) { + // Layers should copy the loss in the top blob + (*top)[0]->Reshape(1, 1, 1, 1); + } FurtherSetUp(bottom, top); } diff --git a/src/caffe/layers/multinomial_logistic_loss_layer.cpp b/src/caffe/layers/multinomial_logistic_loss_layer.cpp index 6486621d8aa..5a408795d6d 100644 --- a/src/caffe/layers/multinomial_logistic_loss_layer.cpp +++ b/src/caffe/layers/multinomial_logistic_loss_layer.cpp @@ -35,6 +35,9 @@ Dtype MultinomialLogisticLossLayer::Forward_cpu( Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD)); loss -= log(prob); } + if (top->size() == 1){ + (*top)[0]->mutable_cpu_data()[0] = loss / num; + } return loss / num; } diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp index a638684f3b6..955581d8875 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp @@ -41,6 +41,9 @@ Dtype SigmoidCrossEntropyLossLayer::Forward_cpu( loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) - log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0))); } + if (top->size() == 1) { + (*top)[0]->mutable_cpu_data()[0] = loss / num; + } return loss / num; } diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu index 61004541fce..0caed2b83bd 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu @@ -29,6 +29,9 @@ Dtype SigmoidCrossEntropyLossLayer::Forward_gpu( loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) - log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0))); } + if (top->size() == 1) { + (*top)[0]->mutable_cpu_data()[0] = loss / num; + } return loss / num; } diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index ef6eebabadd..bdb3272ee25 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -20,6 +20,15 @@ void SoftmaxWithLossLayer::SetUp(const vector*>& bottom, softmax_bottom_vec_.push_back(bottom[0]); softmax_top_vec_.push_back(&prob_); softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_); + if (top->size() >= 1) { + // softmax loss (averaged across batch) + (*top)[0]->Reshape(1, 1, 1, 1); + } + if (top->size() == 2) { + // softmax output + (*top)[1]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + } } template @@ -37,6 +46,12 @@ Dtype SoftmaxWithLossLayer::Forward_cpu( loss += -log(max(prob_data[i * dim + static_cast(label[i])], Dtype(FLT_MIN))); } + if (top->size() >= 1) { + (*top)[0]->mutable_cpu_data()[0] = loss / num; + } + if (top->size() == 2) { + (*top)[1]->ShareData(prob_); + } return loss / num; } diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 1e43a038455..7f2f67b59c6 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -64,6 +64,7 @@ class NetTest : public ::testing::Test { " type: SOFTMAX_LOSS " " bottom: 'innerproduct' " " bottom: 'label' " + " top: 'top_loss' " "} "; NetParameter param; CHECK(google::protobuf::TextFormat::ParseFromString(proto, ¶m)); @@ -81,6 +82,7 @@ TYPED_TEST(NetTest, TestHasBlob) { EXPECT_TRUE(this->net_->has_blob("label")); EXPECT_TRUE(this->net_->has_blob("innerproduct")); EXPECT_FALSE(this->net_->has_blob("loss")); + EXPECT_TRUE(this->net_->has_blob("top_loss")); } TYPED_TEST(NetTest, TestGetBlob) { @@ -88,6 +90,7 @@ TYPED_TEST(NetTest, TestGetBlob) { EXPECT_EQ(this->net_->blob_by_name("label"), this->net_->blobs()[1]); EXPECT_EQ(this->net_->blob_by_name("innerproduct"), this->net_->blobs()[2]); EXPECT_FALSE(this->net_->blob_by_name("loss")); + EXPECT_EQ(this->net_->blob_by_name("top_loss"), this->net_->blobs()[3]); } TYPED_TEST(NetTest, TestHasLayer) {