diff --git a/wenet/cif/cif_decoder.py b/wenet/cif/cif_decoder.py index 0193406a2..919e0cc00 100644 --- a/wenet/cif/cif_decoder.py +++ b/wenet/cif/cif_decoder.py @@ -48,11 +48,6 @@ class BaseDecoder(nn.Module): use_output_layer: whether to use output layer pos_enc_class: PositionalEncoding or ScaledPositionalEncoding normalize_before: whether to use layer_norm before the first block - concat_after: whether to concat attention layer's input and output - if True, additional linear will be applied. - i.e. x -> x + linear(concat(x, att(x))) - if False, no additional linear will be applied. - i.e. x -> x + att(x) """ def __init__( @@ -177,7 +172,6 @@ def __init__( use_output_layer: bool = True, pos_enc_class=PositionalEncoding, normalize_before: bool = True, - concat_after: bool = False, embeds_id: int = -1, ): assert check_argument_types() @@ -205,8 +199,7 @@ def __init__( PositionwiseFeedForward(attention_dim, linear_units, dropout_rate), dropout_rate, - normalize_before, - concat_after) + normalize_before) for _ in range(num_blocks) ]) @@ -294,7 +287,6 @@ def __init__( use_output_layer: bool = True, pos_enc_class=PositionalEncoding, normalize_before: bool = True, - concat_after: bool = False, att_layer_num: int = 6, kernel_size: int = 21, sanm_shfit: int = 0 @@ -355,7 +347,6 @@ def __init__( dropout_rate), dropout_rate, normalize_before, - concat_after, ) for _ in range(att_layer_num) ]) if num_blocks - att_layer_num <= 0: @@ -374,7 +365,6 @@ def __init__( dropout_rate), dropout_rate, normalize_before, - concat_after, ) for _ in range(num_blocks - att_layer_num) ]) self.decoders3 = torch.nn.ModuleList([ @@ -386,7 +376,6 @@ def __init__( dropout_rate), dropout_rate, normalize_before, - concat_after, ) for _ in range(1) ]) diff --git a/wenet/cif/decoder_layer.py b/wenet/cif/decoder_layer.py index deab28e8a..2e5c60f39 100644 --- a/wenet/cif/decoder_layer.py +++ b/wenet/cif/decoder_layer.py @@ -34,11 +34,6 @@ class DecoderLayerSANM(nn.Module): dropout_rate (float): Dropout rate. normalize_before (bool): Whether to use layer_norm before the first block. - concat_after (bool): Whether to concat attention layer's input and - output. - if True, additional linear will be applied. - i.e. x -> x + linear(concat(x, att(x))) - if False, no additional linear will be applied. i.e. x -> x + att(x) """ def __init__( @@ -49,7 +44,6 @@ def __init__( feed_forward: nn.Module, dropout_rate: float, normalize_before: bool = True, - concat_after: bool = False, ): """Construct an DecoderLayer object.""" super(DecoderLayerSANM, self).__init__() @@ -64,13 +58,6 @@ def __init__( self.norm3 = nn.LayerNorm(size, eps=1e-12) self.dropout = nn.Dropout(dropout_rate) self.normalize_before = normalize_before - self.concat_after = concat_after - if self.concat_after: - self.concat_linear1 = nn.Linear(size + size, size) - self.concat_linear2 = nn.Linear(size + size, size) - else: - self.concat_linear1 = nn.Identity() - self.concat_linear2 = nn.Identity() def forward( self,