Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
Fix layer_norm_eps in BERTEncoder (#1215)
Browse files Browse the repository at this point in the history
  • Loading branch information
leezu committed May 1, 2020
1 parent b67b9a4 commit 425f799
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/gluonnlp/model/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,12 @@ def __init__(self, *, num_layers=2, units=512, hidden_size=2048,
self._output_attention = output_attention
self._output_all_encodings = output_all_encodings
self._dropout = dropout
self._layer_norm_eps = layer_norm_eps

with self.name_scope():
if dropout:
self.dropout_layer = nn.Dropout(rate=dropout)
self.layer_norm = nn.LayerNorm(in_channels=units, epsilon=1e-12)
self.layer_norm = nn.LayerNorm(in_channels=units, epsilon=self._layer_norm_eps)
self.position_weight = self.params.get('position_weight', shape=(max_length, units),
init=weight_initializer)
self.transformer_cells = nn.HybridSequential()
Expand Down Expand Up @@ -550,7 +551,7 @@ def _get_decoder(self, units, vocab_size, embed, prefix):
decoder = nn.HybridSequential(prefix=prefix)
decoder.add(nn.Dense(units, flatten=False))
decoder.add(GELU())
decoder.add(nn.LayerNorm(in_channels=units, epsilon=1e-12))
decoder.add(nn.LayerNorm(in_channels=units, epsilon=self.encoder._layer_norm_eps))
decoder.add(nn.Dense(vocab_size, flatten=False, params=embed.collect_params()))
assert decoder[3].weight == list(embed.collect_params().values())[0], \
'The weights of word embedding are not tied with those of decoder'
Expand Down

0 comments on commit 425f799

Please sign in to comment.