Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions src/caffe/layers/contrastive_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,21 @@ void ContrastiveLossLayer<Dtype>::Forward_cpu(
diff_.mutable_cpu_data()); // a_i-b_i
const int channels = bottom[0]->channels();
Dtype margin = this->layer_param_.contrastive_loss_param().margin();
bool legacy_version =
this->layer_param_.contrastive_loss_param().legacy_version();
Dtype loss(0.0);
for (int i = 0; i < bottom[0]->num(); ++i) {
dist_sq_.mutable_cpu_data()[i] = caffe_cpu_dot(channels,
diff_.cpu_data() + (i*channels), diff_.cpu_data() + (i*channels));
if (static_cast<int>(bottom[2]->cpu_data()[i])) { // similar pairs
loss += dist_sq_.cpu_data()[i];
} else { // dissimilar pairs
loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0));
if (legacy_version) {
loss += std::max(margin - dist_sq_.cpu_data()[i], Dtype(0.0));
} else {
Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), 0.0);
loss += dist*dist;
}
}
}
loss = loss / static_cast<Dtype>(bottom[0]->num()) / Dtype(2);
Expand All @@ -59,6 +66,8 @@ template <typename Dtype>
void ContrastiveLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
Dtype margin = this->layer_param_.contrastive_loss_param().margin();
bool legacy_version =
this->layer_param_.contrastive_loss_param().legacy_version();
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
Expand All @@ -76,10 +85,20 @@ void ContrastiveLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
Dtype(0.0),
bout + (j*channels));
} else { // dissimilar pairs
if ((margin-dist_sq_.cpu_data()[j]) > Dtype(0.0)) {
Dtype mdist(0.0);
Dtype beta(0.0);
if (legacy_version) {
mdist = margin - dist_sq_.cpu_data()[j];
beta = -alpha;
} else {
Dtype dist = sqrt(dist_sq_.cpu_data()[j]);
mdist = margin - dist;
beta = -alpha * mdist / (dist + Dtype(1e-4));
}
if (mdist > Dtype(0.0)) {
caffe_cpu_axpby(
channels,
-alpha,
beta,
diff_.cpu_data() + (j*channels),
Dtype(0.0),
bout + (j*channels));
Expand Down
34 changes: 27 additions & 7 deletions src/caffe/layers/contrastive_loss_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,30 +32,48 @@ void ContrastiveLossLayer<Dtype>::Forward_gpu(
Dtype(0.0),
dist_sq_.mutable_gpu_data()); // \Sum (a_i-b_i)^2
Dtype margin = this->layer_param_.contrastive_loss_param().margin();
bool legacy_version =
this->layer_param_.contrastive_loss_param().legacy_version();
Dtype loss(0.0);
for (int i = 0; i < bottom[0]->num(); ++i) {
if (static_cast<int>(bottom[2]->cpu_data()[i])) { // similar pairs
loss += dist_sq_.cpu_data()[i];
} else { // dissimilar pairs
loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0));
if (legacy_version) {
loss += std::max(margin - dist_sq_.cpu_data()[i], Dtype(0.0));
} else {
Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]),
Dtype(0.0));
loss += dist*dist;
}
}
}
loss = loss / static_cast<Dtype>(bottom[0]->num()) / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}

template <typename Dtype>
__global__ void CLLForward(const int count, const int channels,
const Dtype margin, const Dtype alpha,
__global__ void CLLBackward(const int count, const int channels,
const Dtype margin, const bool legacy_version, const Dtype alpha,
const Dtype* y, const Dtype* diff, const Dtype* dist_sq,
Dtype *bottom_diff) {
CUDA_KERNEL_LOOP(i, count) {
int n = i / channels; // the num index, to access y and dist_sq
if (static_cast<int>(y[n])) { // similar pairs
bottom_diff[i] = alpha * diff[i];
} else { // dissimilar pairs
if ((margin-dist_sq[n]) > 0.0) {
bottom_diff[i] = -alpha * diff[i];
Dtype mdist(0.0);
Dtype beta(0.0);
if (legacy_version) {
mdist = (margin - dist_sq[n]);
beta = -alpha;
} else {
Dtype dist = sqrt(dist_sq[n]);
mdist = (margin - dist);
beta = -alpha * mdist / (dist + Dtype(1e-4)) * diff[i];
}
if (mdist > 0.0) {
bottom_diff[i] = beta;
} else {
bottom_diff[i] = 0;
}
Expand All @@ -71,12 +89,14 @@ void ContrastiveLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const int count = bottom[0]->count();
const int channels = bottom[0]->channels();
Dtype margin = this->layer_param_.contrastive_loss_param().margin();
const bool legacy_version =
this->layer_param_.contrastive_loss_param().legacy_version();
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] /
static_cast<Dtype>(bottom[0]->num());
// NOLINT_NEXT_LINE(whitespace/operators)
CLLForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, channels, margin, alpha,
CLLBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, channels, margin, legacy_version, alpha,
bottom[2]->gpu_data(), // pair similarity 0 or 1
diff_.gpu_data(), // the cached eltwise difference between a and b
dist_sq_.gpu_data(), // the cached square distance between a and b
Expand Down
9 changes: 8 additions & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,15 @@ message ConcatParameter {

// Message that stores parameters used by ContrastiveLossLayer
message ContrastiveLossParameter {
//margin for dissimilar pair
// margin for dissimilar pair
optional float margin = 1 [default = 1.0];
// The first implementation of this cost did not exactly match the cost of
// Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
// legacy_version = false (the default) uses (margin - d)^2 as proposed in the
// Hadsell paper. New models should probably use this version.
// legacy_version = true uses (margin - d^2). This is kept to support /
// reproduce existing models and results
optional bool legacy_version = 2 [default = false];
}

// Message that stores parameters used by ConvolutionLayer
Expand Down
58 changes: 51 additions & 7 deletions src/caffe/test/test_contrastive_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ class ContrastiveLossLayerTest : public MultiDeviceTest<TypeParam> {

protected:
ContrastiveLossLayerTest()
: blob_bottom_data_i_(new Blob<Dtype>(128, 10, 1, 1)),
blob_bottom_data_j_(new Blob<Dtype>(128, 10, 1, 1)),
blob_bottom_y_(new Blob<Dtype>(128, 1, 1, 1)),
: blob_bottom_data_i_(new Blob<Dtype>(512, 2, 1, 1)),
blob_bottom_data_j_(new Blob<Dtype>(512, 2, 1, 1)),
blob_bottom_y_(new Blob<Dtype>(512, 1, 1, 1)),
blob_top_loss_(new Blob<Dtype>()) {
// fill the values
FillerParameter filler_param;
filler_param.set_mean(0.0);
filler_param.set_std(0.3); // distances~=1.0 to test both sides of margin
GaussianFiller<Dtype> filler(filler_param);
filler_param.set_min(-1.0);
filler_param.set_max(1.0); // distances~=1.0 to test both sides of margin
UniformFiller<Dtype> filler(filler_param);
filler.Fill(this->blob_bottom_data_i_);
blob_bottom_vec_.push_back(blob_bottom_data_i_);
filler.Fill(this->blob_bottom_data_j_);
Expand Down Expand Up @@ -79,7 +79,8 @@ TYPED_TEST(ContrastiveLossLayerTest, TestForward) {
if (this->blob_bottom_y_->cpu_data()[i]) { // similar pairs
loss += dist_sq;
} else {
loss += std::max(margin-dist_sq, Dtype(0));
Dtype dist = std::max(margin - sqrt(dist_sq), 0.0);
loss += dist*dist;
}
}
loss /= static_cast<Dtype>(num) * Dtype(2);
Expand All @@ -99,4 +100,47 @@ TYPED_TEST(ContrastiveLossLayerTest, TestGradient) {
this->blob_top_vec_, 1);
}

TYPED_TEST(ContrastiveLossLayerTest, TestForwardLegacy) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
layer_param.mutable_contrastive_loss_param()->set_legacy_version(true);
ContrastiveLossLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
// manually compute to compare
const Dtype margin = layer_param.contrastive_loss_param().margin();
const int num = this->blob_bottom_data_i_->num();
const int channels = this->blob_bottom_data_i_->channels();
Dtype loss(0);
for (int i = 0; i < num; ++i) {
Dtype dist_sq(0);
for (int j = 0; j < channels; ++j) {
Dtype diff = this->blob_bottom_data_i_->cpu_data()[i*channels+j] -
this->blob_bottom_data_j_->cpu_data()[i*channels+j];
dist_sq += diff*diff;
}
if (this->blob_bottom_y_->cpu_data()[i]) { // similar pairs
loss += dist_sq;
} else {
loss += std::max(margin - dist_sq, Dtype(0.0));
}
}
loss /= static_cast<Dtype>(num) * Dtype(2);
EXPECT_NEAR(this->blob_top_loss_->cpu_data()[0], loss, 1e-6);
}

TYPED_TEST(ContrastiveLossLayerTest, TestGradientLegacy) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
layer_param.mutable_contrastive_loss_param()->set_legacy_version(true);
ContrastiveLossLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
// check the gradient for the first two bottom layers
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_, 0);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_, 1);
}

} // namespace caffe