refactor(model): keep name pattern of class mapping (#2175)

wenet-e2e · Nov 28, 2023 · 0df2759 · 0df2759
1 parent 4c4878e
commit 0df2759
Show file tree

Hide file tree

Showing 9 changed files with 40 additions and 50 deletions.
diff --git a/wenet/branchformer/cgmlp.py b/wenet/branchformer/cgmlp.py
@@ -25,7 +25,7 @@
 from typing import Tuple
 import torch
 import torch.nn as nn
-from wenet.utils.class_utils import get_activation
+from wenet.utils.class_utils import WENET_ACTIVATION_CLASSES
 
 
 class ConvolutionalSpatialGatingUnit(torch.nn.Module):
@@ -73,7 +73,7 @@ def __init__(
         if gate_activation == "identity":
             self.act = torch.nn.Identity()
         else:
-            self.act = get_activation(gate_activation)
+            self.act = WENET_ACTIVATION_CLASSES[gate_activation]()
 
         self.dropout = torch.nn.Dropout(dropout_rate)
 

diff --git a/wenet/e_branchformer/encoder.py b/wenet/e_branchformer/encoder.py
@@ -28,7 +28,7 @@
 from wenet.utils.mask import add_optional_chunk_mask
 from wenet.utils.class_utils import (
     WENET_ATTENTION_CLASSES, WENET_EMB_CLASSES, WENET_SUBSAMPLE_CLASSES,
-    get_activation,
+    WENET_ACTIVATION_CLASSES,
 )
 
 class EBranchformerEncoder(nn.Module):
@@ -65,7 +65,7 @@ def __init__(
         macaron_style: bool = True,
     ):
         super().__init__()
-        activation = get_activation(activation_type)
+        activation = WENET_ACTIVATION_CLASSES[activation_type]()
         self._output_size = output_size
 
         self.embed = WENET_SUBSAMPLE_CLASSES[input_layer](

diff --git a/wenet/efficient_conformer/encoder.py b/wenet/efficient_conformer/encoder.py
@@ -33,7 +33,7 @@
 from wenet.utils.mask import add_optional_chunk_mask
 from wenet.utils.class_utils import (
     WENET_ATTENTION_CLASSES, WENET_EMB_CLASSES, WENET_SUBSAMPLE_CLASSES,
-    get_activation,
+    WENET_ACTIVATION_CLASSES,
 )
 
 class EfficientConformerEncoder(torch.nn.Module):
@@ -104,7 +104,7 @@ def __init__(
         self.use_dynamic_chunk = use_dynamic_chunk
         self.use_dynamic_left_chunk = use_dynamic_left_chunk
 
-        activation = get_activation(activation_type)
+        activation = WENET_ACTIVATION_CLASSES[activation_type]()
         self.num_blocks = num_blocks
         self.attention_heads = attention_heads
         self.cnn_module_kernel = cnn_module_kernel

diff --git a/wenet/squeezeformer/encoder.py b/wenet/squeezeformer/encoder.py
@@ -29,7 +29,7 @@
     import PositionwiseFeedForward
 from wenet.squeezeformer.convolution import ConvolutionModule
 from wenet.utils.mask import make_pad_mask, add_optional_chunk_mask
-from wenet.utils.class_utils import get_activation
+from wenet.utils.class_utils import WENET_ACTIVATION_CLASSES
 
 
 class SqueezeformerEncoder(nn.Module):
@@ -114,7 +114,7 @@ def __init__(
         self.use_dynamic_chunk = use_dynamic_chunk
         self.use_dynamic_left_chunk = use_dynamic_left_chunk
         self.pos_enc_layer_type = pos_enc_layer_type
-        activation = get_activation(activation_type)
+        activation = WENET_ACTIVATION_CLASSES[activation_type]()
 
         # self-attention module definition
         if pos_enc_layer_type != "rel_pos":

diff --git a/wenet/transducer/joint.py b/wenet/transducer/joint.py
@@ -2,7 +2,7 @@
 
 import torch
 from torch import nn
-from wenet.utils.class_utils import get_activation
+from wenet.utils.class_utils import WENET_ACTIVATION_CLASSES
 
 
 class TransducerJoint(torch.nn.Module):
@@ -23,7 +23,7 @@ def __init__(self,
         assert joint_mode in ['add']
         super().__init__()
 
-        self.activatoin = get_activation(activation)
+        self.activatoin = WENET_ACTIVATION_CLASSES[activation]()
         self.prejoin_linear = prejoin_linear
         self.postjoin_linear = postjoin_linear
         self.joint_mode = joint_mode
@@ -55,7 +55,8 @@ def __init__(self,
                 torch.nn.Tanh(), torch.nn.Dropout(dropout_rate),
                 torch.nn.Linear(join_dim, 1), torch.nn.LogSigmoid())
             self.token_pred = torch.nn.Sequential(
-                get_activation(hat_activation), torch.nn.Dropout(dropout_rate),
+                WENET_ACTIVATION_CLASSES[hat_activation](),
+                torch.nn.Dropout(dropout_rate),
                 torch.nn.Linear(join_dim, self.vocab_size - 1))
 
     def forward(self,

diff --git a/wenet/transducer/predictor.py b/wenet/transducer/predictor.py
@@ -2,7 +2,7 @@
 
 import torch
 from torch import nn
-from wenet.utils.class_utils import get_activation, get_rnn
+from wenet.utils.class_utils import WENET_ACTIVATION_CLASSES, WENET_RNN_CLASSES
 
 
 def ApplyPadding(input, padding, pad_value) -> torch.Tensor:
@@ -79,12 +79,12 @@ def __init__(self,
         # NOTE(Mddct): rnn base from torch not support layer norm
         # will add layer norm and prune value in cell and layer
         # ref: https://github.com/Mddct/neural-lm/blob/main/models/gru_cell.py
-        self.rnn = get_rnn(rnn_type=rnn_type)(input_size=embed_size,
-                                              hidden_size=hidden_size,
-                                              num_layers=num_layers,
-                                              bias=bias,
-                                              batch_first=True,
-                                              dropout=dropout)
+        self.rnn = WENET_RNN_CLASSES[rnn_type](input_size=embed_size,
+                                               hidden_size=hidden_size,
+                                               num_layers=num_layers,
+                                               bias=bias,
+                                               batch_first=True,
+                                               dropout=dropout)
         self.projection = nn.Linear(hidden_size, output_size)
 
     def output_size(self):
@@ -237,7 +237,7 @@ def __init__(self,
         self.embed_dropout = nn.Dropout(p=embed_dropout)
         self.ffn = nn.Linear(self.embed_size, self.embed_size)
         self.norm = nn.LayerNorm(self.embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
+        self.activatoin = WENET_ACTIVATION_CLASSES[activation]()
 
     def output_size(self):
         return self.embed_size
@@ -398,7 +398,7 @@ def __init__(self,
                               groups=embed_size,
                               bias=bias)
         self.norm = nn.LayerNorm(embed_size, eps=layer_norm_epsilon)
-        self.activatoin = get_activation(activation)
+        self.activatoin = WENET_ACTIVATION_CLASSES[activation]()
 
     def output_size(self):
         return self.embed_size

diff --git a/wenet/transformer/decoder.py b/wenet/transformer/decoder.py
@@ -22,7 +22,7 @@
 from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
 from wenet.utils.class_utils import (
     WENET_EMB_CLASSES, WENET_ATTENTION_CLASSES,
-    get_activation
+    WENET_ACTIVATION_CLASSES,
 )
 from wenet.utils.mask import (subsequent_mask, make_pad_mask)
 
@@ -68,7 +68,7 @@ def __init__(
     ):
         super().__init__()
         attention_dim = encoder_output_size
-        activation = get_activation(activation_type)
+        activation = WENET_ACTIVATION_CLASSES[activation_type]()
 
         self.embed = torch.nn.Sequential(
             torch.nn.Identity() if input_layer == "no_pos" else torch.nn.Embedding(

diff --git a/wenet/transformer/encoder.py b/wenet/transformer/encoder.py
@@ -25,7 +25,7 @@
 from wenet.transformer.positionwise_feed_forward import PositionwiseFeedForward
 from wenet.utils.class_utils import (
     WENET_EMB_CLASSES, WENET_SUBSAMPLE_CLASSES, WENET_ATTENTION_CLASSES,
-    get_activation
+    WENET_ACTIVATION_CLASSES,
 )
 from wenet.utils.mask import make_pad_mask
 from wenet.utils.mask import add_optional_chunk_mask
@@ -326,7 +326,7 @@ def __init__(
                          input_layer, pos_enc_layer_type, normalize_before,
                          static_chunk_size, use_dynamic_chunk,
                          global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
+        activation = WENET_ACTIVATION_CLASSES[activation_type]()
         self.encoders = torch.nn.ModuleList([
             TransformerEncoderLayer(
                 output_size,
@@ -391,7 +391,7 @@ def __init__(
                          input_layer, pos_enc_layer_type, normalize_before,
                          static_chunk_size, use_dynamic_chunk,
                          global_cmvn, use_dynamic_left_chunk)
-        activation = get_activation(activation_type)
+        activation = WENET_ACTIVATION_CLASSES[activation_type]()
 
         # self-attention module definition
         encoder_selfattn_layer_args = (

diff --git a/wenet/utils/class_utils.py b/wenet/utils/class_utils.py
@@ -3,6 +3,7 @@
 # Copyright [2023-11-28] <[email protected], Xingchen Song>
 import torch
 
+from wenet.transformer.swish import Swish
 from wenet.transformer.subsampling import (
     LinearNoSubsampling, EmbedinigNoSubsampling,
     Conv1dSubsampling2, Conv2dSubsampling4,
@@ -21,32 +22,20 @@
 from wenet.efficient_conformer.attention import GroupedRelPositionMultiHeadedAttention
 
 
-def get_activation(act):
-    """Return activation function."""
-    # Lazy load to avoid unused import
-    from wenet.transformer.swish import Swish
-
-    activation_funcs = {
-        "hardtanh": torch.nn.Hardtanh,
-        "tanh": torch.nn.Tanh,
-        "relu": torch.nn.ReLU,
-        "selu": torch.nn.SELU,
-        "swish": getattr(torch.nn, "SiLU", Swish),
-        "gelu": torch.nn.GELU
-    }
-
-    return activation_funcs[act]()
-
-
-def get_rnn(rnn_type: str) -> torch.nn.Module:
-    assert rnn_type in ["rnn", "lstm", "gru"]
-    if rnn_type == "rnn":
-        return torch.nn.RNN
-    elif rnn_type == "lstm":
-        return torch.nn.LSTM
-    else:
-        return torch.nn.GRU
+WENET_ACTIVATION_CLASSES = {
+    "hardtanh": torch.nn.Hardtanh,
+    "tanh": torch.nn.Tanh,
+    "relu": torch.nn.ReLU,
+    "selu": torch.nn.SELU,
+    "swish": getattr(torch.nn, "SiLU", Swish),
+    "gelu": torch.nn.GELU,
+}
 
+WENET_RNN_CLASSES = {
+    "rnn": torch.nn.RNN,
+    "lstm": torch.nn.LSTM,
+    "gru": torch.nn.GRU,
+}
 
 WENET_SUBSAMPLE_CLASSES = {
     "linear": LinearNoSubsampling,