diff --git a/docs/api/python/gluon/loss.md b/docs/api/python/gluon/loss.md index 3747a0f89bf2..948f4983d370 100644 --- a/docs/api/python/gluon/loss.md +++ b/docs/api/python/gluon/loss.md @@ -25,6 +25,7 @@ This package includes several commonly used loss functions in neural networks. LogisticLoss TripletLoss CTCLoss + CosineEmbeddingLoss PoissonNLLLoss ``` diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py index 5d3ebb7caf5b..da43b62a1c34 100644 --- a/python/mxnet/gluon/loss.py +++ b/python/mxnet/gluon/loss.py @@ -23,7 +23,7 @@ 'SigmoidBinaryCrossEntropyLoss', 'SigmoidBCELoss', 'SoftmaxCrossEntropyLoss', 'SoftmaxCELoss', 'KLDivLoss', 'CTCLoss', 'HuberLoss', 'HingeLoss', - 'SquaredHingeLoss', 'LogisticLoss', 'TripletLoss', 'PoissonNLLLoss'] + 'SquaredHingeLoss', 'LogisticLoss', 'TripletLoss', 'PoissonNLLLoss', 'CosineEmbeddingLoss'] import numpy as np from .. import ndarray @@ -767,3 +767,71 @@ def hybrid_forward(self, F, pred, target, sample_weight=None, epsilon=1e-08): loss += stirling_factor loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss) + + +class CosineEmbeddingLoss(Loss): + r"""For a target label 1 or -1, vectors input1 and input2, the function computes the cosine distance + between the vectors. This can be interpreted as how similar/dissimilar two input vectors are. + + .. math:: + + L = \sum_i \begin{cases} 1 - {cos\_sim({input1}_i, {input2}_i)} & \text{ if } {label}_i = 1\\ + {cos\_sim({input1}_i, {input2}_i)} & \text{ if } {label}_i = -1 \end{cases}\\ + cos\_sim(input1, input2) = \frac{{input1}_i.{input2}_i}{||{input1}_i||.||{input2}_i||} + + `input1`, `input2` can have arbitrary shape as long as they have the same number of elements. + + Parameters + ---------- + weight : float or None + Global scalar weight for loss. + batch_axis : int, default 0 + The axis that represents mini-batch. + margin : float + Margin of separation between correct and incorrect pair. + + + Inputs: + - **input1**: a tensor with arbitrary shape + - **input2**: another tensor with same shape as pred to which input1 is + compared for similarity and loss calculation + - **label**: A 1-D tensor indicating for each pair input1 and input2, target label is 1 or -1 + - **sample_weight**: element-wise weighting tensor. Must be broadcastable + to the same shape as input1. For example, if input1 has shape (64, 10) + and you want to weigh each sample in the batch separately, + sample_weight should have shape (64, 1). + + Outputs: + - **loss**: The loss tensor with shape (batch_size,). + """ + def __init__(self, weight=None, batch_axis=0, margin=0, **kwargs): + super(CosineEmbeddingLoss, self).__init__(weight, batch_axis, **kwargs) + self._margin = margin + + def hybrid_forward(self, F, input1, input2, label, sample_weight=None): + input1 = _reshape_like(F, input1, input2) + label = label.reshape((-1, 1)) + cos_sim = self._cosine_similarity(F, input1, input2) + y_1 = label == 1 + y_minus_1 = label == -1 + cos_sim_a = (1 - cos_sim) * y_1 + + if F is ndarray: + z_array = F.array([0]) + else: + z_array = F.zeros((1, 1)) + cos_sim_b = F.broadcast_maximum(z_array, y_minus_1 * (cos_sim - self._margin), axis=1) + loss = cos_sim_a + cos_sim_b + loss = _apply_weighting(F, loss, self._weight, sample_weight) + return loss + + def _cosine_similarity(self, F, x, y, axis=-1): + # Calculates the cosine similarity between 2 vectors + x_norm = F.norm(x, axis=axis).reshape(-1, 1) + y_norm = F.norm(y, axis=axis).reshape(-1, 1) + x_dot_y = F.sum(x*y, axis=axis).reshape(-1, 1) + if F is ndarray: + eps_arr = F.array([1e-12]) + else: + eps_arr = F.full((1, 1), 1e-12) + return (x_dot_y / F.broadcast_maximum(x_norm * y_norm, eps_arr)) diff --git a/tests/python/unittest/test_loss.py b/tests/python/unittest/test_loss.py index 2b062fba5ec0..18d1ebf8fb11 100644 --- a/tests/python/unittest/test_loss.py +++ b/tests/python/unittest/test_loss.py @@ -349,6 +349,23 @@ def test_triplet_loss(): assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @with_seed() +def test_cosine_loss(): + #Generating samples + input1 = mx.nd.random.randn(3, 2) + input2 = mx.nd.random.randn(3, 2) + label = mx.nd.sign(mx.nd.random.randn(input1.shape[0])) + #Calculating loss from cosine embedding loss function in Gluon + Loss = gluon.loss.CosineEmbeddingLoss() + loss = Loss(input1, input2, label) + + # Calculating the loss Numpy way + numerator = mx.nd.sum(input1 * input2, keepdims=True, axis=1) + denominator = mx.nd.sqrt(mx.nd.sum(input1**2, axis=1, keepdims=True)) \ + * mx.nd.sqrt(mx.nd.sum(input2**2, axis=1, keepdims=True)) + numpy_loss = mx.nd.where(label == 1, 1-numerator/denominator, \ + mx.nd.broadcast_maximum(mx.nd.array([0]), numerator/denominator, axis=1)) + assert_almost_equal(loss.asnumpy(), numpy_loss.asnumpy(), rtol=1e-3, atol=1e-5) + def test_poisson_nllloss(): pred = mx.nd.random.normal(shape=(3, 4)) min_pred = mx.nd.min(pred) @@ -404,6 +421,7 @@ def test_poisson_nllloss_mod(): optimizer='adam') assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + if __name__ == '__main__': import nose nose.runmodule()