Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
fix possible NaN in GELU backward
Browse files Browse the repository at this point in the history
  • Loading branch information
haojin2 committed Apr 24, 2019
1 parent 3b48289 commit ad60da9
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/operator/mshadow_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ namespace mshadow_op {
__constant__ const float PI = 3.14159265358979323846;
__constant__ const float SELU_ALPHA = 1.6732632423543772848170429916717;
__constant__ const float SELU_LAMBDA = 1.0507009873554804934193349852946;
__constant__ const float SQRT_2 = 1.4142135623730950488016887242096;
#else
const float PI = 3.14159265358979323846;
const float SELU_ALPHA = 1.6732632423543772848170429916717;
const float SELU_LAMBDA = 1.0507009873554804934193349852946;
const float SQRT_2 = 1.4142135623730950488016887242096;
using std::isnan;
#endif
using std::enable_if;
Expand Down Expand Up @@ -173,11 +175,11 @@ MXNET_UNARY_MATH_OP(erf_grad, 2.0 / math::sqrt(PI) * math::exp(-(a * a)));
MXNET_SIMPLE_UNARY_MATH_OP(erf);

MXNET_UNARY_MATH_OP(gelu,
DType(0.5f * float(a) * (1.0f + math::erf(float(a) / math::sqrt(2.0f)))));
DType(0.5f * static_cast<float>(a) * (1.0f + math::erf(static_cast<float>(a) / SQRT_2))));

MXNET_BINARY_MATH_OP_NC(gelu_grad,
DType(float(b) / float(a) +
0.5f * float(a) * erf_grad::Map(float(a) / math::sqrt(2.0f)) / math::sqrt(2.0f)));
DType(0.5f * (1.0f + math::erf(static_cast<float>(a) / SQRT_2) +
static_cast<float>(a) * erf_grad::Map(static_cast<float>(a) / SQRT_2) / SQRT_2)));

MXNET_SIMPLE_UNARY_MATH_OP(exp);

Expand Down

0 comments on commit ad60da9

Please sign in to comment.