diff --git a/python/tvm/relay/op/_tensor_grad.py b/python/tvm/relay/op/_tensor_grad.py index fe22f45fca9c..3a82e46e6a7d 100644 --- a/python/tvm/relay/op/_tensor_grad.py +++ b/python/tvm/relay/op/_tensor_grad.py @@ -305,6 +305,15 @@ def softmax_grad(orig, grad): return [(grad - _sum(grad * orig, orig.attrs.axis, True)) * orig] +@register_gradient("nn.log_softmax") +def log_softmax_grad(orig, grad): + """Gradient of log_softmax""" + x = orig.args[0] + sm = _nn.softmax(x, axis=orig.attrs.axis) + grad = grad / sm + return softmax_grad(sm, grad) + + @register_gradient("nn.bias_add") def bias_add_grad(orig, grad): """Returns gradient of bias_add""" diff --git a/tests/python/relay/test_op_grad_level1.py b/tests/python/relay/test_op_grad_level1.py index ba3526579c58..114bda0eccd5 100644 --- a/tests/python/relay/test_op_grad_level1.py +++ b/tests/python/relay/test_op_grad_level1.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import numpy as np +import pytest import tvm from tvm import relay @@ -100,7 +101,13 @@ def check_binary_op(opfunc, ref): def test_softmax_grad(): data = relay.var("data", relay.TensorType((1, 16), "float64")) fwd_func = relay.Function([data], relay.nn.softmax(data)) - check_grad(fwd_func) + check_grad(fwd_func, scale=1) + + +def test_log_softmax_grad(): + data = relay.var("data", relay.TensorType((2, 16), "float64")) + fwd_func = relay.Function([data], relay.nn.log_softmax(data)) + check_grad(fwd_func, scale=1) def test_bias_add_grad(): @@ -111,6 +118,4 @@ def test_bias_add_grad(): if __name__ == "__main__": - test_unary_op() - test_binary_op() - test_bias_add_grad() + pytest.main([__file__])