diff --git a/vllm_ascend/_310p/fused_moe/__init__.py b/vllm_ascend/_310p/fused_moe/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/vllm_ascend/_310p/fused_moe/experts_selector.py b/vllm_ascend/_310p/fused_moe/experts_selector.py
new file mode 100644
index 00000000000..71200c992de
--- /dev/null
+++ b/vllm_ascend/_310p/fused_moe/experts_selector.py
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from collections.abc import Callable
+
+import torch
+
+from vllm_ascend.ops.fused_moe.experts_selector import _native_select_experts
+from vllm_ascend.utils import get_weight_prefetch_method
+
+
+def select_experts(
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    top_k: int,
+    use_grouped_topk: bool,
+    renormalize: bool,
+    topk_group: int | None = None,
+    num_expert_group: int | None = None,
+    custom_routing_function: Callable | None = None,
+    scoring_func: str = "softmax",
+    e_score_correction_bias: torch.Tensor | None = None,
+    global_num_experts: int = -1,
+):
+    """
+    Fused experts with select experts.
+
+    Args:
+        router_logits: router logits of shape (num_tokens, hidden_size).
+        hidden_states: Hidden states of shape (num_tokens, hidden_size).
+        top_k: number of top k experts.
+        use_grouped_topk: Whether to group experts before selecting top-k.
+        renormalize: Whether to renormalize the routing weights.
+        topk_group: Number of expert groups to select from.
+        num_expert_group: Number of experts in each group.
+        custom_routing_function: Custom routing function.
+        scoring_func: Scoring function to use.
+        e_score_correction_bias: Correction bias to apply to expert scores.
+        global_num_experts: Global number of experts.
+
+    Returns:
+        topk_weights: router weights of shape (num_tokens, top_k).
+        topk_ids: selected expert IDs of shape (num_tokens, top_k).
+    """
+    # prefetch w1_w3_proj.weight preprocess
+    weight_prefetch_method = get_weight_prefetch_method()
+    if weight_prefetch_method:
+        weight_prefetch_method.maybe_prefetch_moe_weight_preprocess(hidden_states, "gate_up")
+    topk_weights, topk_ids = _native_select_experts(
+        hidden_states=hidden_states,
+        router_logits=router_logits,
+        top_k=top_k,
+        use_grouped_topk=use_grouped_topk,
+        renormalize=renormalize,
+        topk_group=topk_group,
+        num_expert_group=num_expert_group,
+        custom_routing_function=custom_routing_function,
+        scoring_func=scoring_func,
+        e_score_correction_bias=e_score_correction_bias,
+        global_num_experts=global_num_experts,
+    )
+    return topk_weights, topk_ids
diff --git a/vllm_ascend/_310p/fused_moe/fused_moe.py b/vllm_ascend/_310p/fused_moe/fused_moe.py
new file mode 100644
index 00000000000..5cca50361e0
--- /dev/null
+++ b/vllm_ascend/_310p/fused_moe/fused_moe.py
@@ -0,0 +1,300 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from collections.abc import Callable
+
+import torch
+from vllm.distributed import get_dp_group, get_ep_group, get_tp_group
+from vllm.forward_context import get_forward_context
+from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig
+from vllm.model_executor.layers.fused_moe.layer import FusedMoE, UnquantizedFusedMoEMethod
+from vllm.model_executor.layers.fused_moe.shared_fused_moe import SharedFusedMoE
+
+from vllm_ascend.ascend_forward_context import MoECommType
+from vllm_ascend.ops.fused_moe.experts_selector import zero_experts_compute
+from vllm_ascend.ops.fused_moe.moe_comm_method import FusedExpertsResult, _MoECommMethods
+from vllm_ascend.quantization.methods.base import QuantType
+
+from .experts_selector import select_experts
+from .moe_comm_method import AllGatherCommImpl310
+
+
+class AscendUnquantizedFusedMoEMethod310(UnquantizedFusedMoEMethod):
+    def __init__(self, moe: FusedMoEConfig = None):
+        super().__init__(moe=moe)
+
+    def process_weights_after_loading(self, layer):
+        super().process_weights_after_loading(layer)
+
+        # Fused gate_up_proj (column parallel)
+        w13_data = self._maybe_pad_weight(layer.w13_weight.data).transpose(1, 2).contiguous()
+        layer.w13_weight = torch.nn.Parameter(w13_data, requires_grad=False)
+        # down_proj (row parallel)
+        w2_data = self._maybe_pad_weight(layer.w2_weight.data).transpose(1, 2).contiguous()
+        layer.w2_weight = torch.nn.Parameter(w2_data, requires_grad=False)
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        use_grouped_topk: bool,
+        top_k: int,
+        router_logits: torch.Tensor,
+        renormalize: bool,
+        topk_group: int | None = None,
+        num_expert_group: int | None = None,
+        custom_routing_function: Callable | None = None,
+        scoring_func: str = "softmax",
+        routed_scaling_factor: float = 1.0,
+        e_score_correction_bias: torch.Tensor | None = None,
+        global_num_experts: int = -1,
+        expert_map: torch.Tensor | None = None,
+        apply_router_weight_on_input: bool = False,
+        **kwargs,
+    ) -> torch.Tensor:
+        zero_expert_num = getattr(layer, "zero_expert_num", 0)
+        zero_expert_type = getattr(layer, "zero_expert_type", None)
+        assert routed_scaling_factor == 1.0
+
+        topk_weights, topk_ids = select_experts(
+            hidden_states=x,
+            router_logits=router_logits,
+            top_k=top_k,
+            use_grouped_topk=use_grouped_topk,
+            renormalize=renormalize,
+            topk_group=topk_group,
+            num_expert_group=num_expert_group,
+            custom_routing_function=custom_routing_function,
+            scoring_func=scoring_func,
+            e_score_correction_bias=e_score_correction_bias,
+            global_num_experts=global_num_experts,
+        )
+
+        if zero_expert_num > 0 and zero_expert_type is not None:
+            topk_ids, topk_weights, zero_expert_result = zero_experts_compute(
+                expert_indices=topk_ids,
+                expert_scales=topk_weights,
+                num_experts=global_num_experts,
+                zero_expert_type=zero_expert_type,
+                hidden_states=x,
+            )
+
+        topk_weights = topk_weights.to(x.dtype)
+
+        moe_comm_method = get_forward_context().moe_comm_method
+        final_hidden_states = moe_comm_method.fused_experts(
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            expert_map=expert_map,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+        )
+        if zero_expert_num > 0 and zero_expert_type is not None:
+            final_hidden_states += zero_expert_result
+        return final_hidden_states
+
+
+class AscendFusedMoE310(FusedMoE):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.global_num_experts = kwargs["num_experts"]
+
+        if self.quant_config is None:
+            self.quant_method = AscendUnquantizedFusedMoEMethod310(self.moe_config)
+        else:
+            self.quant_method = self.quant_config.get_quant_method(self, self.layer_name)
+
+        assert self.quant_method is not None
+
+        self.moe_config.tp_group = get_tp_group()
+        self.moe_config.dp_group = get_dp_group()
+        self.moe_config.ep_group = get_ep_group()
+        self.moe_config.supports_eplb = False
+
+        # init moe
+        self.global_expert_map = None
+        self.local_expert_map = None
+        if self.moe_config.ep_size > 1:
+            self.global_expert_map, self.local_expert_map = self.init_experts_map(self.moe_config)
+        self.local_num_experts = (
+            torch.sum(self.local_expert_map != -1).item()
+            if self.local_expert_map is not None
+            else self.global_num_experts
+        )
+
+        self.moe_config.num_experts = self.global_num_experts
+        self.moe_config.num_local_experts = self.local_num_experts
+        self.moe_config.global_redundant_expert_num = 0
+
+        moe_quant_params = {
+            "num_experts": self.local_num_experts,
+            "hidden_size": self.hidden_size,
+            "intermediate_size_per_partition": self.intermediate_size_per_partition,
+            "params_dtype": self.params_dtype,
+            "weight_loader": self.weight_loader,
+        }
+
+        self.quant_method.create_weights(layer=self, **moe_quant_params)
+        self.quant_type = self.get_quant_type()
+
+        _MoECommMethods[MoECommType.ALLGATHER] = AllGatherCommImpl310(self.moe_config)
+
+    def init_experts_map(self, moe_config):
+        """
+        Initialize expert mapping for MoE (Mixture of Experts) model.
+
+        This function creates mappings between global expert indices and local expert indices
+        for each rank in the expert parallel group. It divides the total experts among
+        different ranks and creates both global and local expert maps that are used
+        during MoE computation to determine which experts are handled by which rank.
+
+        Args:
+            moe_config: Configuration object containing MoE parameters including
+                       number of experts, expert parallel size, and expert parallel rank.
+
+        Returns:
+            tuple: A tuple containing:
+                   - global_expert_map: Stack of expert maps for all ranks
+                   - local_expert_map: Expert map for the current rank (transferred to NPU)
+        """
+        n_experts = moe_config.num_experts
+        ep_size = moe_config.ep_size
+        all_experts = torch.arange(n_experts, dtype=torch.int32)
+        experts_groups = all_experts.chunk(ep_size)
+        global_expert_map = []
+        local_expert_map = None
+        for rankid in range(ep_size):
+            expert_map = torch.full((n_experts,), -1, dtype=torch.int32)
+            local_experts = experts_groups[rankid]
+            expert_map[local_experts] = torch.arange(local_experts.shape[0], dtype=torch.int32)
+            global_expert_map.append(expert_map)
+            if rankid == moe_config.ep_rank:
+                local_expert_map = expert_map.npu()
+        return torch.stack(global_expert_map), local_expert_map
+
+    def get_quant_type(self) -> QuantType:
+        quant_method = self.quant_method
+        if not hasattr(quant_method, "quant_method") or quant_method.quant_method is None:
+            return QuantType.NONE
+
+        method = quant_method.quant_method
+        quant_type = getattr(method, "quant_type", QuantType.NONE)
+        if quant_type != QuantType.NONE:
+            # TODO: w8a8 quantization will be supported soon, and only reject w4a8 here.
+            raise RuntimeError("W8A8 is not supported currently.")
+        return QuantType.NONE
+
+    def forward_impl(  # type: ignore[override]
+        self, hidden_states: torch.Tensor, router_logits: torch.Tensor
+    ) -> torch.Tensor:
+        assert self.quant_method is not None
+        forward_context = get_forward_context()
+
+        hidden_states, router_logits, _, context_metadata = forward_context.moe_comm_method.prepare(
+            hidden_states=hidden_states, router_logits=router_logits, quant_type=self.quant_type
+        )
+
+        if isinstance(hidden_states, tuple):
+            hidden_states, pertoken_scale = hidden_states
+        else:
+            pertoken_scale = None
+
+        # Matrix multiply.
+        fused_experts_results: FusedExpertsResult = self.quant_method.apply(
+            layer=self,
+            x=hidden_states,
+            router_logits=router_logits,
+            pertoken_scale=pertoken_scale,
+            top_k=self.top_k,
+            renormalize=self.renormalize,
+            use_grouped_topk=self.use_grouped_topk,
+            global_num_experts=self.global_num_experts,
+            expert_map=self.local_expert_map,
+            topk_group=self.topk_group,
+            num_expert_group=self.num_expert_group,
+            custom_routing_function=self.custom_routing_function,
+            scoring_func=self.scoring_func,
+            routed_scaling_factor=self.routed_scaling_factor,
+            e_score_correction_bias=self.e_score_correction_bias,
+            activation=self.activation,
+            apply_router_weight_on_input=self.apply_router_weight_on_input,
+        )
+
+        routed_out = forward_context.moe_comm_method.finalize(
+            hidden_states=fused_experts_results.routed_out,
+            reduce_results=self.reduce_results,
+            context_metadata=context_metadata,
+        )
+
+        return routed_out
+
+
+class AscendSharedFusedMoE310(SharedFusedMoE, AscendFusedMoE310):
+    def __init__(
+        self,
+        shared_experts: torch.nn.Module,
+        gate: torch.nn.Module | None = None,
+        use_overlapped: bool = True,
+        **kwargs,
+    ):
+        AscendFusedMoE310.__init__(self, **kwargs)
+        self._shared_experts = shared_experts
+        self.use_overlapped = use_overlapped
+        self.shared_expert_stream = None
+        self._gate = gate
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if self._shared_experts is None:
+            fused_out = AscendFusedMoE310.forward(
+                self,
+                hidden_states=hidden_states,
+                router_logits=router_logits,
+            )
+            shared_out = None
+            return shared_out, fused_out
+        shared_out, fused_out = AscendFusedMoE310.forward(
+            self,
+            hidden_states=hidden_states,
+            router_logits=router_logits,
+        )
+        return shared_out, fused_out
+
+    def _forward_shared_experts(self, hidden_states: torch.Tensor):
+        if self._shared_experts is None:
+            return None
+        part1_out = self._shared_experts_part1(hidden_states)
+        shared_out = self._shared_experts_part2(hidden_states, part1_out)
+        return shared_out
+
+    def forward_impl(  # type: ignore[override]
+        self, hidden_states: torch.Tensor, router_logits: torch.Tensor
+    ):
+        routed_out = AscendFusedMoE310.forward_impl(
+            self,
+            hidden_states=hidden_states,
+            router_logits=router_logits,
+        )
+        if self._shared_experts is None:
+            return routed_out
+        shared_out = self._forward_shared_experts(hidden_states)
+        return shared_out, routed_out
diff --git a/vllm_ascend/_310p/fused_moe/moe_comm_method.py b/vllm_ascend/_310p/fused_moe/moe_comm_method.py
new file mode 100644
index 00000000000..36fadf27b92
--- /dev/null
+++ b/vllm_ascend/_310p/fused_moe/moe_comm_method.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+from __future__ import annotations
+
+from vllm_ascend.ops.fused_moe.moe_comm_method import AllGatherCommImpl
+
+from .token_dispatcher import TokenDispatcherWithAllGather310
+
+
+class AllGatherCommImpl310(AllGatherCommImpl):
+    """This implementation is the same as NativeAllGatherCommImpl,
+    but uses NPU-specific ops for better performance.
+
+    This implementation should be compatible with all scenarios, and
+    thus it is the default implementation for MoE communication methods.
+    It uses `torch_npu.npu_moe_init_routing_v2` for pre-processing
+    and `torch_npu.npu_moe_token_unpermute` for post-processing
+    to handle the token-to-expert mapping and communication efficiently.
+    """
+
+    def _get_token_dispatcher(self):
+        return TokenDispatcherWithAllGather310(
+            top_k=self.moe_config.experts_per_token,
+            num_experts=self.moe_config.num_experts,
+            num_local_experts=self.moe_config.num_local_experts,
+        )
diff --git a/vllm_ascend/_310p/fused_moe/token_dispatcher.py b/vllm_ascend/_310p/fused_moe/token_dispatcher.py
new file mode 100644
index 00000000000..00c611cf29c
--- /dev/null
+++ b/vllm_ascend/_310p/fused_moe/token_dispatcher.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2024; NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+# Copyright 2023 DeepSeek-AI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+from vllm.distributed.parallel_state import get_ep_group
+
+from vllm_ascend.ops.fused_moe.token_dispatcher import TokenDispatcherWithAllGather, TokenDispatchResult
+
+
+class TokenDispatcherWithAllGather310(TokenDispatcherWithAllGather):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def token_dispatch(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        expert_map: torch.Tensor | None = None,
+        global_redundant_expert_num: int = 0,
+        mc2_mask: torch.Tensor | None = None,
+        apply_router_weight_on_input: bool = False,
+        with_quant: bool = False,
+        dynamic_eplb: bool = False,
+        pertoken_scale: torch.Tensor | None = None,
+    ):
+        if with_quant:
+            raise RuntimeError("Quant is not supported for 310P currently.")
+        self.original_shape = hidden_states.shape
+
+        num_tokens = hidden_states.shape[:-1].numel()
+        self.apply_router_weight_on_input = apply_router_weight_on_input
+        if self.apply_router_weight_on_input:
+            assert topk_weights.dim() == 2, "`topk_weights` should be in shape (num_tokens, topk)"
+            _, topk = topk_weights.shape
+            assert topk == 1, "Only support topk=1 when `apply_router_weight_on_input` is True"
+            hidden_states = hidden_states * topk_weights.to(hidden_states.dtype)
+        if expert_map is not None:
+            mask = expert_map[topk_ids] != -1
+            topk_weights = topk_weights * mask
+            first_expert_idx = get_ep_group().rank_in_group * self.num_experts_local
+            last_expert_idx = first_expert_idx + self.num_experts_local
+        else:
+            first_expert_idx = 0
+            last_expert_idx = self.num_experts_local
+
+        sorted_hidden_states, expanded_row_idx, expert_tokens = self.moe_init_routing(
+            hidden_states,
+            topk_ids,
+            active_num=num_tokens * self.top_k,
+            active_expert_range=[first_expert_idx, last_expert_idx],
+        )
+        expert_tokens = expert_tokens.to(torch.int64)
+        group_list_type = 1  # `count` mode
+        context_metadata = {"topk_weights": topk_weights, "expanded_row_idx": expanded_row_idx}
+
+        return TokenDispatchResult(
+            hidden_states=sorted_hidden_states,
+            dynamic_scale=None,
+            group_list=expert_tokens,
+            group_list_type=group_list_type,
+            context_metadata=context_metadata,
+        )
+
+    def moe_init_routing(self, x, expert_idx, active_num, active_expert_range):
+        """
+        Initialize routing for Mixture of Experts (MoE) model by organizing tokens
+        according to their assigned experts and preparing data structures for
+        efficient expert computation.
+
+        Args:
+            x (torch.Tensor): Input tensor containing token representations
+            expert_idx (torch.Tensor): Tensor containing expert indices for each token
+            active_num (int): Number of active experts or None
+            active_expert_range (tuple): Range (start, end) of active experts
+
+        Returns:
+            tuple: A tuple containing:
+                   - expanded_x: Subset of input tensor for active experts
+                   - expanded_row_idx: Mapping indices for token positions
+                   - expert_tokens_count: Count of tokens assigned to each expert
+        """
+        MAX_INT32 = torch.iinfo(torch.int32).max
+        expert_start, expert_end = active_expert_range
+        num_rows = x.shape[0]
+        k = expert_idx.shape[-1]
+        expert_idx_flat = expert_idx.flatten()
+        mask = (expert_idx_flat >= expert_start) & (expert_idx_flat < expert_end)
+        actual_expert_total_num = mask.sum().item()
+        expert_idx_flat = torch.where(
+            ~mask, torch.full_like(expert_idx_flat, MAX_INT32, dtype=torch.int32), expert_idx_flat
+        )
+        sorted_idx = torch.argsort(expert_idx_flat, stable=True)
+        sorted_expert_idx = expert_idx_flat[sorted_idx]
+        expanded_row_idx = torch.full((num_rows * k,), -1, dtype=torch.int32, device=expert_idx.device)
+        expanded_row_idx[sorted_idx[:actual_expert_total_num]] = torch.arange(
+            actual_expert_total_num, dtype=torch.int32, device=expert_idx.device
+        )
+        expert_tokens_count = torch.bincount(
+            sorted_expert_idx[:actual_expert_total_num] - expert_start, minlength=expert_end - expert_start
+        )
+        active_num = min(active_num or actual_expert_total_num, actual_expert_total_num)
+        expanded_x = x[sorted_idx[:active_num] // k]
+
+        return expanded_x, expanded_row_idx, expert_tokens_count
diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py
index 26604f0bb00..c0ca0954215 100644
--- a/vllm_ascend/ascend_forward_context.py
+++ b/vllm_ascend/ascend_forward_context.py
@@ -218,6 +218,7 @@ def select_moe_comm_method(num_tokens: int, vllm_config: VllmConfig, is_draft_mo
     4. On A3 with expert parallel, prefer fused MC2 when using w8a8_dynamic
        quantization with small EP size, no dynamic_eplb, and not in MTP
        mode; otherwise use MC2 within capacity or all-to-all.
+    5. On 310P, always use all-gather.
 
     Args:
         num_tokens (int): The number of tokens in the current batch.
@@ -272,7 +273,8 @@ def select_moe_comm_method(num_tokens: int, vllm_config: VllmConfig, is_draft_mo
             elif envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 2:
                 fused_prefill_enable = False
             moe_comm_type = MoECommType.FUSED_MC2 if fused_prefill_enable else MoECommType.ALLTOALL
-
+    elif soc_version in {AscendDeviceType._310P}:
+        moe_comm_type = MoECommType.ALLGATHER
     else:
         raise ValueError(f"Unsupported soc_version: {soc_version}")
     return moe_comm_type
diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py
index d301b402637..edec0802ed6 100644
--- a/vllm_ascend/ops/fused_moe/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe/fused_moe.py
@@ -81,9 +81,8 @@ def process_weights_after_loading(self, layer):
             1, 2).contiguous()
         layer.w2_weight = torch.nn.Parameter(w2_data, requires_grad=False)
 
-        if get_ascend_device_type() != AscendDeviceType._310P:
-            layer.w13_weight.data = maybe_trans_nz(layer.w13_weight.data)
-            layer.w2_weight.data = maybe_trans_nz(layer.w2_weight.data)
+        layer.w13_weight.data = maybe_trans_nz(layer.w13_weight.data)
+        layer.w2_weight.data = maybe_trans_nz(layer.w2_weight.data)
 
     def apply(self,
               layer: torch.nn.Module,
diff --git a/vllm_ascend/ops/fused_moe/moe_mlp.py b/vllm_ascend/ops/fused_moe/moe_mlp.py
index d102a1d5a35..e29945ea13f 100644
--- a/vllm_ascend/ops/fused_moe/moe_mlp.py
+++ b/vllm_ascend/ops/fused_moe/moe_mlp.py
@@ -291,11 +291,7 @@ def unquant_apply_mlp(hidden_states: torch.Tensor,
         group_type=0,
         group_list=group_list,
     )[0]
-    if get_ascend_device_type() == AscendDeviceType._310P:
-        gate_up_out = torch_npu.npu_swiglu(gate_up_out.to(torch.float32)).to(
-            torch.float16)
-    else:
-        gate_up_out = torch_npu.npu_swiglu(gate_up_out)
+    gate_up_out = torch_npu.npu_swiglu(gate_up_out)
 
     if topk_scales is not None:
         gate_up_out *= topk_scales
diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py
index b4da71f391f..31f1a8da740 100644
--- a/vllm_ascend/ops/rotary_embedding.py
+++ b/vllm_ascend/ops/rotary_embedding.py
@@ -190,8 +190,7 @@ def _rope_forward_oot(
     cos, sin = get_cos_and_sin_slice()
     # adopt custom kernel path for rotary_embedding
     if _custom_rotary_embedding_enabled(
-            query, is_neox_style, self.head_size) and get_ascend_device_type(
-            ) != AscendDeviceType._310P:
+            query, is_neox_style, self.head_size):
         query, key = torch.ops._C_ascend.rotary_embedding(
             positions,
             query,
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
index 9aadfb66da9..2f15016057f 100644
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -625,6 +625,7 @@ def register_ascend_customop(vllm_config: VllmConfig | None = None):
 
     # 310P: override selected ops with 310P implementations (keep minimal changes outside _310p)
     if is_310p():
+        from vllm_ascend._310p.fused_moe.fused_moe import AscendFusedMoE310, AscendSharedFusedMoE310
         from vllm_ascend._310p.ops.activation import AscendSiluAndMul310
         from vllm_ascend._310p.ops.layernorm import AscendGemmaRMSNorm310, AscendRMSNorm310
         from vllm_ascend._310p.ops.mm_encoder_attention import AscendMMEncoderAttention310
@@ -637,6 +638,8 @@ def register_ascend_customop(vllm_config: VllmConfig | None = None):
                 "RotaryEmbedding": AscendRotaryEmbedding310,
                 "RMSNorm": AscendRMSNorm310,
                 "GemmaRMSNorm": AscendGemmaRMSNorm310,
+                "FusedMoE": AscendFusedMoE310,
+                "SharedFusedMoE": AscendSharedFusedMoE310,
             }
         )