diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index eee5ea67f6e1..b09b332765f8 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -367,6 +367,8 @@ static void ExpandDimEx(const nnvm::NodeAttrs& attrs,
                         const std::vector<NDArray>& outputs) {
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
+  // skip zero-size tensor
+  if (inputs[0].shape().Size() == 0U) return;
   // If inputs are supposed to be in MKLDNN format and
   // MKLDNN support the data type or the shape. Then convert
   // it to the output format and shape
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index b586d3c3f9c3..643b9c1e9ba0 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -4166,6 +4166,60 @@ def g(data):
         assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol)
 
 
+@with_seed()
+@use_np
+def test_np_expand_dims():
+    class TestExpandDims(HybridBlock):
+        def __init__(self, axis):
+            super(TestExpandDims, self).__init__()
+            self._axis = axis
+
+        def hybrid_forward(self, F, x):
+            return F.np.expand_dims(x, self._axis)
+
+    dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, np.float32, np.float64, np.bool]
+    shapes = [
+        (),
+        (0,),
+        (0, 1),
+        (3,),
+        (1, 2, 3),
+    ]
+    flags = [True, False]
+    for dtype, shape, hybridize in itertools.product(dtypes, shapes, flags):
+        ndim = len(shape)
+        for axis in range(-ndim-1, ndim+1):
+            x_np = _np.random.uniform(0, 100, size=shape).astype(dtype)
+            expected = _np.expand_dims(x_np, axis)
+            for req in ['write', 'add']:
+                test_expand_dims = TestExpandDims(axis)
+                if hybridize:
+                    test_expand_dims.hybridize()
+
+                x = np.array(x_np)
+                x.attach_grad(req)
+                initial_grad = np.random.uniform(0, 10, size=x.shape).astype(x.dtype)
+                x.grad[()] = initial_grad
+                with mx.autograd.record():
+                    y = test_expand_dims(x)
+                y.backward()
+
+                assert_almost_equal(y.asnumpy(), expected, use_broadcast=False)
+                if req == 'null':
+                    assert same(x.grad.asnumpy(), initial_grad.asnumpy())
+                elif req == 'write':
+                    assert same(x.grad.asnumpy(), _np.ones_like(x.asnumpy()))
+                else:
+                    assert_almost_equal(x.grad.asnumpy(), initial_grad.asnumpy() + _np.ones_like(initial_grad.asnumpy()),
+                                        atol=1e-2 if dtype is np.float16 else 1e-4,
+                                        rtol=1e-2 if dtype is np.float16 else 1e-4,
+                                        use_broadcast=False)
+
+                # check imperative again
+                y = np.expand_dims(x, axis)
+                assert_almost_equal(y.asnumpy(), expected, use_broadcast=False)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()