diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 07a9a0a8b522..0f2800149726 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1217,6 +1217,7 @@ def should_moe_wna16_use_cuda(
 ):
     return (
         current_platform.is_cuda()
+        and not current_platform.is_rocm()
         and bit == 4
         and group_size in [32, 64, 128]
         and num_valid_tokens / num_experts <= 6
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index ab4c3e0740a9..2c30d54de6fd 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -336,6 +336,7 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
         "bitsandbytes",
+        "moe_wna16",
     ]
 
     @classmethod