update

bukejiyu · bukejiyu · commit 1be7aeffbaf4 · 2025-11-06T08:44:26.000Z
diff --git a/fastdeploy/model_executor/layers/backends/metax/moe/fused_moe_triton_metax_backend.py b/fastdeploy/model_executor/layers/backends/metax/moe/fused_moe_triton_metax_backend.py
@@ -81,10 +81,12 @@ def create_weights(self, layer: nn.Layer, **extra_weight_attrs):
                 up_gate_proj_attrs = {
                     **extra_weight_attrs,
                     "tensor_track": TensorTracker(shape=up_gate_proj_weight_shape, output_dim=True),
+                    "SHARD_ID_TO_SHARDED_DIM": {"gate": 1, "down": 0, "up": 1},
                 }
                 down_proj_attrs = {
                     **extra_weight_attrs,
                     "tensor_track": TensorTracker(shape=down_proj_weight_shape, output_dim=False),
+                    "SHARD_ID_TO_SHARDED_DIM": {"gate": 1, "down": 0, "up": 1},
                 }
             else:
                 up_gate_proj_weight_shape = self.up_gate_proj_weight_shape[::-1]
diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py
@@ -1231,24 +1231,55 @@ def create_weights(self, layer: nn.Layer, **extra_weight_attrs):
                 down_proj_attrs,
             )
         else:
-            if self.model_format != "torch" and layer.fd_config.load_config.load_choices == "default_v1":
-                # transpose [0,2,1]
-                up_gate_proj_weight_shape = (
-                    self.up_gate_proj_weight_shape[:1] + self.up_gate_proj_weight_shape[1:][::-1]
-                )
-                up_gate_proj_scale_shape = self.up_gate_proj_scale_shape[:1] + self.up_gate_proj_scale_shape[1:][::-1]
-                down_proj_weight_shape = self.down_proj_weight_shape[:1] + self.down_proj_weight_shape[1:][::-1]
-                down_proj_scale_shape = self.down_proj_scale_shape[:1] + self.down_proj_scale_shape[1:][::-1]
-                extra_weight_attrs = {
-                    **extra_weight_attrs,
-                    "SHARD_ID_TO_SHARDED_DIM": {"gate": 0, "down": 1, "up": 0},
-                }
+            # 1.init shape
+            extra_weight_attrs = {**extra_weight_attrs}
+            if layer.fd_config.load_config.load_choices == "default_v1":
+                if self.model_format != "torch":
+                    # transpose [0,2,1]
+                    up_gate_proj_weight_shape = (
+                        self.up_gate_proj_weight_shape[:1] + self.up_gate_proj_weight_shape[1:][::-1]
+                    )
+                    up_gate_proj_scale_shape = (
+                        self.up_gate_proj_scale_shape[:1] + self.up_gate_proj_scale_shape[1:][::-1]
+                    )
+                    down_proj_weight_shape = self.down_proj_weight_shape[:1] + self.down_proj_weight_shape[1:][::-1]
+                    down_proj_scale_shape = self.down_proj_scale_shape[:1] + self.down_proj_scale_shape[1:][::-1]
+                    up_gate_proj_attrs = {
+                        **extra_weight_attrs,
+                        "tensor_track": TensorTracker(
+                            shape=up_gate_proj_weight_shape,
+                            output_dim=False,
+                        ),
+                    }
+                    down_proj_attrs = {
+                        **extra_weight_attrs,
+                        "tensor_track": TensorTracker(
+                            shape=down_proj_weight_shape,
+                            output_dim=False,
+                        ),
+                    }
+                else:
+                    up_gate_proj_weight_shape = self.up_gate_proj_weight_shape
+                    up_gate_proj_scale_shape = self.up_gate_proj_scale_shape
+                    down_proj_weight_shape = self.down_proj_weight_shape
+                    down_proj_scale_shape = self.down_proj_scale_shape
+                    up_gate_proj_attrs = {
+                        **extra_weight_attrs,
+                        "SHARD_ID_TO_SHARDED_DIM": {"gate": 0, "down": 1, "up": 0},
+                    }
+                    down_proj_attrs = {
+                        **extra_weight_attrs,
+                        "SHARD_ID_TO_SHARDED_DIM": {"gate": 0, "down": 1, "up": 0},
+                    }
             else:
-                # v0 loader or torch model format
+                # v0 loader
                 up_gate_proj_weight_shape = self.up_gate_proj_weight_shape
                 up_gate_proj_scale_shape = self.up_gate_proj_scale_shape
                 down_proj_weight_shape = self.down_proj_weight_shape
                 down_proj_scale_shape = self.down_proj_scale_shape
+                up_gate_proj_attrs = {}
+                down_proj_attrs = {}
+
             self.weight_dtype = paddle.float8_e4m3fn
             self.added_scale_attrs = ["up_gate_proj_weight_scale_inv", "down_proj_weight_scale_inv"]
             up_gate_proj_weight_name = self.added_weight_attrs[0]
@@ -1295,20 +1326,20 @@ def create_weights(self, layer: nn.Layer, **extra_weight_attrs):
             )
             set_weight_attrs(
                 getattr(layer, up_gate_proj_weight_name),
-                extra_weight_attrs,
+                up_gate_proj_attrs,
             )
             set_weight_attrs(
                 getattr(layer, up_gate_proj_scale_name),
-                extra_weight_attrs,
+                up_gate_proj_attrs,
             )
 
             set_weight_attrs(
                 getattr(layer, down_proj_weight_name),
-                extra_weight_attrs,
+                down_proj_attrs,
             )
             set_weight_attrs(
                 getattr(layer, down_proj_scale_name),
-                extra_weight_attrs,
+                down_proj_attrs,
             )
 
     def process_weights_after_loading(self, layer):
@@ -1385,6 +1416,13 @@ def _process_quantize(weight_idx):
                 down_proj_weight_name = self.added_weight_attrs[1]
                 up_gate_proj_scale_name = self.added_scale_attrs[0]
                 down_proj_scale_name = self.added_scale_attrs[1]
+                if (
+                    not weight_fully_copied(getattr(layer, up_gate_proj_weight_name))
+                    or not weight_fully_copied(getattr(layer, down_proj_weight_name))
+                    or not weight_fully_copied(getattr(layer, up_gate_proj_scale_name))
+                    or not weight_fully_copied(getattr(layer, down_proj_scale_name))
+                ):
+                    return
                 process_weight_transpose(layer, up_gate_proj_weight_name)
                 process_weight_transpose(layer, down_proj_weight_name)
                 process_weight_transpose(layer, up_gate_proj_scale_name)

Original file line number	Diff line number	Diff line change
`@@ -81,10 +81,12 @@ def create_weights(self, layer: nn.Layer, **extra_weight_attrs):`
`81`	`81`	`up_gate_proj_attrs = {`
`82`	`82`	`**extra_weight_attrs,`
`83`	`83`	`"tensor_track": TensorTracker(shape=up_gate_proj_weight_shape, output_dim=True),`
	`84`	`+ "SHARD_ID_TO_SHARDED_DIM": {"gate": 1, "down": 0, "up": 1},`
`84`	`85`	`}`
`85`	`86`	`down_proj_attrs = {`
`86`	`87`	`**extra_weight_attrs,`
`87`	`88`	`"tensor_track": TensorTracker(shape=down_proj_weight_shape, output_dim=False),`
	`89`	`+ "SHARD_ID_TO_SHARDED_DIM": {"gate": 1, "down": 0, "up": 1},`
`88`	`90`	`}`
`89`	`91`	`else:`
`90`	`92`	`up_gate_proj_weight_shape = self.up_gate_proj_weight_shape[::-1]`