Revert "Add SwiGLU for auto Llama (PaddlePaddle#8038)"

This reverts commit a574900.
heavyrain-lzy · Mar 13, 2024 · 2304195 · 2304195
1 parent e145bfc
commit 2304195
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 26 deletions.
diff --git a/paddlenlp/transformers/llama/modeling_auto.py b/paddlenlp/transformers/llama/modeling_auto.py
@@ -32,16 +32,6 @@
 except ImportError:
     fused_rotary_position_embedding = None
 
-try:
-    from paddle.incubate.nn.functional import swiglu
-except ImportError:
-
-    def swiglu(x, y=None):
-        if y is None:
-            x, y = paddle.chunk(x, chunks=2, axis=-1)
-        return F.silu(x) * y
-
-
 from paddlenlp.transformers.conversion_utils import (
     StateDictNameMapping,
     init_name_mappings,
@@ -238,10 +228,10 @@ def __init__(self, config, ipp: Optional[int] = None):
 
     def forward(self, x):
         if self.fuse_attention_ffn:
-            x = swiglu(self.gate_up_fused_proj(x))
+            gate_out, up_out = paddle.chunk(self.gate_up_fused_proj(x), chunks=2, axis=-1)
+            out = self.down_proj(F.silu(gate_out) * up_out)
         else:
-            x = swiglu(self.gate_proj(x), self.up_proj(x))
-        out = self.down_proj(x)
+            out = self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x))
         return out
 
 

diff --git a/paddlenlp/transformers/llama/modeling_auto_static.py b/paddlenlp/transformers/llama/modeling_auto_static.py
@@ -31,16 +31,6 @@
 except ImportError:
     fused_rotary_position_embedding = None
 
-try:
-    from paddle.incubate.nn.functional import swiglu
-except ImportError:
-
-    def swiglu(x, y=None):
-        if y is None:
-            x, y = paddle.chunk(x, chunks=2, axis=-1)
-        return F.silu(x) * y
-
-
 from paddlenlp.transformers.conversion_utils import (
     StateDictNameMapping,
     init_name_mappings,
@@ -252,10 +242,10 @@ def forward(self, x):
         fleet.auto.shard_tensor(self.down_proj.weight, *get_dist_attr(["mp", None], self.ipp))
 
         if self.fuse_attention_ffn:
-            x = swiglu(self.gate_up_fused_proj(x))
+            gate_out, up_out = paddle.chunk(self.gate_up_fused_proj(x), chunks=2, axis=-1)
+            out = self.down_proj(F.silu(gate_out) * up_out)
         else:
-            x = swiglu(self.gate_proj(x), self.up_proj(x))
-        out = self.down_proj(x)
+            out = self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x))
         return out