Skip to content

Commit

Permalink
Adjust stochastic depth dropout probability calculation (NVIDIA#6120)
Browse files Browse the repository at this point in the history
Signed-off-by: Ante Jukić <[email protected]>
Signed-off-by: hsiehjackson <[email protected]>
  • Loading branch information
anteju authored and hsiehjackson committed Jun 2, 2023
1 parent 1b835ba commit c648c90
Show file tree
Hide file tree
Showing 12 changed files with 34 additions and 27 deletions.
2 changes: 1 addition & 1 deletion docs/source/asr/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ use it, specify the following parameters in the encoder config file to reproduce
# ...
stochastic_depth_drop_prob: 0.3
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1
See :ref:`documentation of ConformerEncoder <conformer-encoder-api>` for more details. Note that stochastic depth
is supported for both CTC and Transducer model variations (or any other kind of model/loss that's using
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.ConvASRDecoder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.RNNTDecoder
Expand Down
2 changes: 1 addition & 1 deletion examples/asr/conf/conformer/conformer_ctc_bpe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.ConvASRDecoder
Expand Down
2 changes: 1 addition & 1 deletion examples/asr/conf/conformer/conformer_ctc_char.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.ConvASRDecoder
Expand Down
2 changes: 1 addition & 1 deletion examples/asr/conf/conformer/conformer_transducer_bpe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.RNNTDecoder
Expand Down
2 changes: 1 addition & 1 deletion examples/asr/conf/conformer/conformer_transducer_char.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.RNNTDecoder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.ConvASRDecoder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ model:
# set to non-zero to enable stochastic depth
stochastic_depth_drop_prob: 0.0
stochastic_depth_mode: linear # linear or uniform
stochastic_depth_start_layer: 0
stochastic_depth_start_layer: 1

decoder:
_target_: nemo.collections.asr.modules.RNNTDecoder
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/asr/modules/conformer_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class ConformerEncoder(NeuralModule, StreamingEncoder, Exportable, AccessMixin):
stochastic_depth_start_layer (int): starting layer for stochastic depth.
All layers before this will never be dropped. Note that drop
probability will be adjusted accordingly if mode is "linear" when
start layer is > 0. Defaults to 0.
start layer is > 1. Defaults to 1.
"""

def input_example(self, max_batch=1, max_dim=256):
Expand Down Expand Up @@ -207,7 +207,7 @@ def __init__(
dropout_att=0.0,
stochastic_depth_drop_prob: float = 0.0,
stochastic_depth_mode: str = "linear",
stochastic_depth_start_layer: int = 0,
stochastic_depth_start_layer: int = 1,
):
super().__init__()
d_ff = d_model * ff_expansion_factor
Expand Down
33 changes: 20 additions & 13 deletions nemo/collections/asr/parts/utils/regularization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ def compute_stochastic_depth_drop_probs(
num_layers: int,
stochastic_depth_drop_prob: float = 0.0,
stochastic_depth_mode: str = "linear",
stochastic_depth_start_layer: int = 0,
stochastic_depth_start_layer: int = 1,
) -> List[float]:
"""Computes drop probabilities for stochastic depth regularization technique.
The first layer is never dropped and the starting layer needs to be greater
or equal to 1.
Args:
num_layers (int): number of layers in the network.
Expand All @@ -36,22 +38,27 @@ def compute_stochastic_depth_drop_probs(
stochastic_depth_start_layer (int): starting layer for stochastic depth.
All layers before this will never be dropped. Note that drop
probability will be adjusted accordingly if mode is "linear" when
start layer is > 0. Defaults to 0.
start layer is > 1. Defaults to 1.
Returns:
List[float]: list of drop probabilities for all layers
"""
if not (0 <= stochastic_depth_drop_prob < 1.0):
raise ValueError("stochastic_depth_drop_prob has to be in [0, 1).")
if not (0 <= stochastic_depth_start_layer <= num_layers):
raise ValueError("stochastic_depth_start_layer has to be in [0, num layers].")
L = num_layers - stochastic_depth_start_layer
if not (1 <= stochastic_depth_start_layer <= num_layers):
raise ValueError("stochastic_depth_start_layer has to be in [1, num layers].")

# Layers before `stochastic_depth_start_layer` are never dropped
layer_drop_probs = [0.0] * stochastic_depth_start_layer
if stochastic_depth_mode == "linear":
# we are dividing by L - 1 to ensure we start from 0 probability
# (never drop the first layer) and end with desired drop probability.
layer_drop_probs += [l / (L - 1) * stochastic_depth_drop_prob for l in range(L)]
elif stochastic_depth_mode == "uniform":
layer_drop_probs += [stochastic_depth_drop_prob] * L
else:
raise ValueError('stochastic_depth_mode has to be one of ["linear", "uniform"].')

# Layers starting with `stochastic_depth_start_layer` may be dropped
if (L := num_layers - stochastic_depth_start_layer) > 0:
if stochastic_depth_mode == "linear":
# we start with 1/L * drop_prob and and end with the desired drop probability.
layer_drop_probs += [l / L * stochastic_depth_drop_prob for l in range(1, L + 1)]
elif stochastic_depth_mode == "uniform":
layer_drop_probs += [stochastic_depth_drop_prob] * L
else:
raise ValueError(
f'stochastic_depth_mode has to be one of ["linear", "uniform"]. Current value: {stochastic_depth_mode}'
)
return layer_drop_probs
6 changes: 3 additions & 3 deletions tests/collections/asr/test_conformer_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_stochastic_depth_model_creation(self):

# linear mode
for drop_prob in [0.3, 0.5, 0.9]:
for start_layer in [0, 2]:
for start_layer in [1, 3]:
model = ConformerEncoder(
feat_in=10,
n_layers=n_layers,
Expand All @@ -40,7 +40,7 @@ def test_stochastic_depth_model_creation(self):
stochastic_depth_start_layer=start_layer,
)
L = n_layers - start_layer
assert model.layer_drop_probs == [0.0] * start_layer + [drop_prob * l / (L - 1) for l in range(L)]
assert model.layer_drop_probs == [0.0] * start_layer + [drop_prob * l / L for l in range(1, L + 1)]

# uniform mode
for drop_prob in [0.3, 0.5, 0.9]:
Expand Down Expand Up @@ -71,7 +71,7 @@ def test_stochastic_depth_model_creation(self):
with pytest.raises(ValueError, match="stochastic_depth_mode has to be one of"):
ConformerEncoder(feat_in=10, n_layers=n_layers, d_model=4, feat_out=8, stochastic_depth_mode="weird")

for start_layer in [-1, 5]:
for start_layer in [-1, 0, 5]:
with pytest.raises(ValueError, match="stochastic_depth_start_layer has to be in"):
ConformerEncoder(
feat_in=10, n_layers=n_layers, d_model=4, feat_out=8, stochastic_depth_start_layer=start_layer,
Expand Down

0 comments on commit c648c90

Please sign in to comment.