diff --git a/source/lib/src/gelu.cc b/source/lib/src/gelu.cc index c554da0578..e86faa882b 100644 --- a/source/lib/src/gelu.cc +++ b/source/lib/src/gelu.cc @@ -37,7 +37,7 @@ void deepmd::gelu_grad_grad_cpu( for (int ii = 0; ii < size; ii++) { const FPTYPE var1 = tanh(SQRT_2_PI * (xx[ii] + 0.044715 * xx[ii] * xx[ii] *xx[ii])); const FPTYPE var2 = SQRT_2_PI * (1 - var1 * var1) * (0.134145 * xx[ii] * xx[ii] + 1); - out[ii] = dy[ii] * dy_2[ii] * (0.134145 * SQRT_2_PI * xx[ii] * xx[ii] * (1 - var1 * var1) - SQRT_2_PI * xx[ii] * var2 * (0.134145 * xx[ii] * xx[ii] + 1) * var1 + var2); + out[ii] = dy[ii] * dy_2[ii] * (0.134145 * SQRT_2_PI * xx[ii] * xx[ii] * (1 - var1 * var1) - SQRT_2_PI * xx[ii] * var2 * (0.134145 * xx[ii] * xx[ii] + 1) * var1 + var2); } } diff --git a/source/op/gelu_multi_device.cc b/source/op/gelu_multi_device.cc index dc86ab6c8d..af49c4ac7e 100644 --- a/source/op/gelu_multi_device.cc +++ b/source/op/gelu_multi_device.cc @@ -144,6 +144,10 @@ class GeluGradGradOp : public OpKernel { context_output_index++, x_tensor.shape(), &output_tensor)); + DeviceFunctor() ( + device, + context->eigen_device() + ); // flat the tensors FPTYPE * out = output_tensor->flat().data(); const FPTYPE * x = x_tensor.flat().data();