From dd9f82229023a4df8d281909e14dca12efeca084 Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 10:25:30 +0800 Subject: [PATCH 01/10] add 310p ut Signed-off-by: pu-zhe --- tests/ut/attention/test_attention_v1.py | 8 +-- tests/ut/ops/test_activation.py | 73 ++++++++++++++++--------- tests/ut/ops/test_layernorm.py | 73 +++++++++++++------------ 3 files changed, 89 insertions(+), 65 deletions(-) diff --git a/tests/ut/attention/test_attention_v1.py b/tests/ut/attention/test_attention_v1.py index 3f6316bae22..bd0f5988d88 100644 --- a/tests/ut/attention/test_attention_v1.py +++ b/tests/ut/attention/test_attention_v1.py @@ -41,9 +41,7 @@ def test_get_builder_cls(self): self.assertEqual(AscendAttentionBackend.get_builder_cls(), AscendAttentionMetadataBuilder) - @patch('vllm_ascend.utils.get_ascend_device_type', - return_value=AscendDeviceType.A3) - def test_get_kv_cache_shape_not_310p(self, mock_soc_version): + def test_get_kv_cache_shape_not(self): result = AscendAttentionBackend.get_kv_cache_shape(10, 20, 30, 40) self.assertEqual(result, (2, 10, 20, 30, 40)) @@ -92,9 +90,7 @@ def test_reorder_batch(self): self.assertFalse(result) @patch('vllm_ascend.attention.attention_v1.AscendMetadata') - @patch('vllm_ascend.utils.get_ascend_device_type', - return_value=AscendDeviceType.A3) - def test_build_non_310p(self, mock_soc_version, mock_ascend_metadata): + def test_build(self, mock_ascend_metadata): common_attn_metadata = AscendCommonAttentionMetadata( query_start_loc=torch.tensor([0, 2, 5, 9]), query_start_loc_cpu=torch.tensor([0, 2, 5, 9]), diff --git a/tests/ut/ops/test_activation.py b/tests/ut/ops/test_activation.py index 63ddb018e68..43a8f3150c1 100644 --- a/tests/ut/ops/test_activation.py +++ b/tests/ut/ops/test_activation.py @@ -52,8 +52,8 @@ def test_QuickGELU_forward(mock_gelu, dummy_tensor, default_vllm_config): mock_gelu.assert_called_once() -@pytest.mark.skipif(is_310p_hw(), reason="310P operator classes have already been refactored.") -@pytest.mark.parametrize("is_310p", [True, False]) +@pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.") +@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3) @patch("torch_npu.npu_swiglu", side_effect=lambda x: x + 1) @patch("torch.ops.vllm.maybe_wait_prefetch_done", side_effect=lambda x: None) @patch("torch.ops.vllm.maybe_prefetch_mlp_down_proj", side_effect=lambda x: None) @@ -61,36 +61,59 @@ def test_SiluAndMul_forward( mock_maybe_prefetch_mlp_down_proj, mock_maybe_wait_prefetch_done, mock_swiglu, - is_310p, + device_type, dummy_tensor, default_vllm_config, ): - if is_310p and (not is_310p_hw()): - pytest.skip("Pseudo-310P param case is not valid on non-310P CI after refactor.") + layer = SiluAndMul() + out = layer.forward(dummy_tensor) + expected_arg = dummy_tensor + + # assert mock_maybe_prefetch_mlp_down_proj.call_count == 1 + mock_maybe_prefetch_mlp_down_proj.assert_called_once() + + # assert mock_swiglu.call_count == 1 + mock_swiglu.assert_called_once() - with patch( - "vllm_ascend.utils.get_ascend_device_type", - return_value=AscendDeviceType._310P if is_310p else AscendDeviceType.A3, - ): - layer = SiluAndMul() - out = layer.forward(dummy_tensor) + # assert mock_maybe_wait_prefetch_done.call_count == 1 + mock_maybe_wait_prefetch_done.assert_called_once() - if is_310p: - expected_arg = dummy_tensor.to(torch.float32) - else: - expected_arg = dummy_tensor + actual_arg = mock_swiglu.call_args[0][0] + assert torch.allclose(actual_arg, expected_arg), "npu_swiglu called with unexpected input" + + expected_out = dummy_tensor + 1 + assert torch.allclose(out, expected_out) + + +@pytest.mark.skipif(not is_310p_hw(), reason="310P device unittest case.") +@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P) +@patch("torch.nn.functional.silu", side_effect=lambda x: x + 1) +@patch("torch.ops.vllm.maybe_wait_prefetch_done", side_effect=lambda x: None) +@patch("torch.ops.vllm.maybe_prefetch_mlp_down_proj", side_effect=lambda x: None) +def test_SiluAndMul_forward( + mock_maybe_prefetch_mlp_down_proj, + mock_maybe_wait_prefetch_done, + mock_swiglu, + device_type, + dummy_tensor, + default_vllm_config, +): + layer = SiluAndMul() + out = layer.forward(dummy_tensor) + expected_arg = dummy_tensor - # assert mock_maybe_prefetch_mlp_down_proj.call_count == 1 - mock_maybe_prefetch_mlp_down_proj.assert_called_once() + # assert mock_maybe_prefetch_mlp_down_proj.call_count == 1 + mock_maybe_prefetch_mlp_down_proj.assert_called_once() - # assert mock_swiglu.call_count == 1 - mock_swiglu.assert_called_once() + # assert mock_swiglu.call_count == 1 + mock_swiglu.assert_called_once() - # assert mock_maybe_wait_prefetch_done.call_count == 1 - mock_maybe_wait_prefetch_done.assert_called_once() + # assert mock_maybe_wait_prefetch_done.call_count == 1 + mock_maybe_wait_prefetch_done.assert_called_once() - actual_arg = mock_swiglu.call_args[0][0] - assert torch.allclose(actual_arg, expected_arg), "npu_swiglu called with unexpected input" + actual_arg = mock_swiglu.call_args[0][0] + assert torch.allclose(actual_arg, expected_arg), "swiglu called with unexpected input" - expected_out = dummy_tensor + 1 - assert torch.allclose(out, expected_out) + h = dummy_tensor.shape[-1] // 2 + expected_out = (dummy_tensor[..., :h] + 1) * dummy_tensor[..., h:] + assert torch.allclose(out, expected_out) \ No newline at end of file diff --git a/tests/ut/ops/test_layernorm.py b/tests/ut/ops/test_layernorm.py index 6805ef0675e..5b2ce5ee403 100644 --- a/tests/ut/ops/test_layernorm.py +++ b/tests/ut/ops/test_layernorm.py @@ -40,43 +40,48 @@ def default_vllm_config(): yield mock_config -@pytest.mark.skipif(is_310p_hw(), reason="310P operator classes have already been refactored.") -@pytest.mark.parametrize("is_310p", [True, False]) +@pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.") @pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)]) +@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3) @patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) @patch("torch_npu.npu_add_rms_norm", side_effect=mock_add_rms_norm) @patch("torch.ops._C_ascend.npu_add_rms_norm_bias", side_effect=mock_add_rms_norm_bias) def test_RMSNorm_forward( - mock_add_rms_norm_bias, mock_add_rmsnorm, mock_rmsnorm, is_310p, residual, dummy_tensor, default_vllm_config + mock_add_rms_norm_bias, mock_add_rmsnorm, mock_rmsnorm, device_type, residual, dummy_tensor, default_vllm_config ): - if is_310p and (not is_310p_hw()): - pytest.skip("Pseudo-310P branch is invalid on non-310P CI after refactor.") - - with patch( - "vllm_ascend.utils.get_ascend_device_type", - return_value=AscendDeviceType._310P if is_310p else AscendDeviceType.A3, - ): - layer = RMSNorm(hidden_size=8, eps=1e-05) - if residual is not None: - out_x, out_residual = layer.forward_oot(dummy_tensor, residual) - - if is_310p: - expected_arg_x = dummy_tensor + residual.to(dummy_tensor.dtype) - expected_out_x = expected_arg_x + 1 - expected_out_residual = expected_arg_x.to(residual.dtype) - - mock_rmsnorm.assert_called_once() - assert torch.allclose(out_x, expected_out_x) - assert torch.allclose(out_residual, expected_out_residual) - else: - expected_out_x = 2 * dummy_tensor - expected_out_residual = 2 * residual - mock_add_rms_norm_bias.assert_called_once() - assert torch.allclose(out_x, expected_out_x) - assert torch.allclose(out_residual, expected_out_residual) - else: - out_x = layer.forward_oot(dummy_tensor, residual) - expected_out_x = dummy_tensor + 1 - - mock_rmsnorm.assert_called_once() - assert torch.allclose(out_x, expected_out_x) + layer = RMSNorm(hidden_size=8, eps=1e-05) + if residual is not None: + out_x, out_residual = layer.forward_oot(dummy_tensor, residual) + expected_out_x = 2 * dummy_tensor + expected_out_residual = 2 * residual + mock_add_rms_norm_bias.assert_called_once() + assert torch.allclose(out_x, expected_out_x) + assert torch.allclose(out_residual, expected_out_residual) + else: + out_x = layer.forward_oot(dummy_tensor, residual) + expected_out_x = dummy_tensor + 1 + + mock_rmsnorm.assert_called_once() + assert torch.allclose(out_x, expected_out_x) + + +@pytest.mark.skipif(not is_310p_hw(), reason="310P device unittest case.") +@pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)]) +@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P) +@patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) +def test_RMSNorm_forward( + mock_add_rms_norm_bias, mock_add_rmsnorm, mock_rmsnorm, device_type, residual, dummy_tensor, default_vllm_config +): + layer = RMSNorm(hidden_size=8, eps=1e-05) + if residual is not None: + out_x, out_residual = layer.forward_oot(dummy_tensor, residual) + expected_out_x = dummy_tensor + residual + expected_out_residual = expected_out_x + 1 + mock_rmsnorm.assert_called_once() + assert torch.allclose(out_x, expected_out_x) + assert torch.allclose(out_residual, expected_out_residual) + else: + out_x = layer.forward_oot(dummy_tensor, residual) + expected_out_x = dummy_tensor + 1 + mock_rmsnorm.assert_called_once() + assert torch.allclose(out_x, expected_out_x) \ No newline at end of file From a397690341ba6f34ca41de1698e38c967d4b2aef Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 10:31:58 +0800 Subject: [PATCH 02/10] sign-off Signed-off-by: pu-zhe From dbfad75b45dbe838cb9a01d3c258910d96a68259 Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 10:38:45 +0800 Subject: [PATCH 03/10] cleancode Signed-off-by: pu-zhe --- tests/ut/ops/test_activation.py | 10 +++++----- tests/ut/ops/test_layernorm.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/ut/ops/test_activation.py b/tests/ut/ops/test_activation.py index 43a8f3150c1..3c4b5c3507a 100644 --- a/tests/ut/ops/test_activation.py +++ b/tests/ut/ops/test_activation.py @@ -90,28 +90,28 @@ def test_SiluAndMul_forward( @patch("torch.nn.functional.silu", side_effect=lambda x: x + 1) @patch("torch.ops.vllm.maybe_wait_prefetch_done", side_effect=lambda x: None) @patch("torch.ops.vllm.maybe_prefetch_mlp_down_proj", side_effect=lambda x: None) -def test_SiluAndMul_forward( +def test_SiluAndMul_forward_310p( mock_maybe_prefetch_mlp_down_proj, mock_maybe_wait_prefetch_done, - mock_swiglu, + mock_silu, device_type, dummy_tensor, default_vllm_config, ): layer = SiluAndMul() out = layer.forward(dummy_tensor) - expected_arg = dummy_tensor + expected_arg = dummy_tensor[..., :h] # assert mock_maybe_prefetch_mlp_down_proj.call_count == 1 mock_maybe_prefetch_mlp_down_proj.assert_called_once() # assert mock_swiglu.call_count == 1 - mock_swiglu.assert_called_once() + mock_silu.assert_called_once() # assert mock_maybe_wait_prefetch_done.call_count == 1 mock_maybe_wait_prefetch_done.assert_called_once() - actual_arg = mock_swiglu.call_args[0][0] + actual_arg = mock_silu.call_args[0][0] assert torch.allclose(actual_arg, expected_arg), "swiglu called with unexpected input" h = dummy_tensor.shape[-1] // 2 diff --git a/tests/ut/ops/test_layernorm.py b/tests/ut/ops/test_layernorm.py index 5b2ce5ee403..0e0317f0533 100644 --- a/tests/ut/ops/test_layernorm.py +++ b/tests/ut/ops/test_layernorm.py @@ -69,7 +69,7 @@ def test_RMSNorm_forward( @pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)]) @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P) @patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) -def test_RMSNorm_forward( +def test_RMSNorm_forward_310p( mock_add_rms_norm_bias, mock_add_rmsnorm, mock_rmsnorm, device_type, residual, dummy_tensor, default_vllm_config ): layer = RMSNorm(hidden_size=8, eps=1e-05) From faaf0196b221f6b7488a6dc4c9fdc40dc5c13d02 Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 10:42:09 +0800 Subject: [PATCH 04/10] cleancode Signed-off-by: pu-zhe --- tests/ut/ops/test_activation.py | 2 +- tests/ut/ops/test_layernorm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ut/ops/test_activation.py b/tests/ut/ops/test_activation.py index 3c4b5c3507a..c490e434cb8 100644 --- a/tests/ut/ops/test_activation.py +++ b/tests/ut/ops/test_activation.py @@ -105,7 +105,7 @@ def test_SiluAndMul_forward_310p( # assert mock_maybe_prefetch_mlp_down_proj.call_count == 1 mock_maybe_prefetch_mlp_down_proj.assert_called_once() - # assert mock_swiglu.call_count == 1 + # assert mock_silu.call_count == 1 mock_silu.assert_called_once() # assert mock_maybe_wait_prefetch_done.call_count == 1 diff --git a/tests/ut/ops/test_layernorm.py b/tests/ut/ops/test_layernorm.py index 0e0317f0533..fc23f5656cc 100644 --- a/tests/ut/ops/test_layernorm.py +++ b/tests/ut/ops/test_layernorm.py @@ -70,7 +70,7 @@ def test_RMSNorm_forward( @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P) @patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) def test_RMSNorm_forward_310p( - mock_add_rms_norm_bias, mock_add_rmsnorm, mock_rmsnorm, device_type, residual, dummy_tensor, default_vllm_config + mock_rmsnorm, device_type, residual, dummy_tensor, default_vllm_config ): layer = RMSNorm(hidden_size=8, eps=1e-05) if residual is not None: From 9f7a76b5303d5849119ea39bf790839167111864 Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 11:22:57 +0800 Subject: [PATCH 05/10] cleancode Signed-off-by: pu-zhe --- tests/ut/ops/test_activation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ut/ops/test_activation.py b/tests/ut/ops/test_activation.py index c490e434cb8..a8f0f314b8a 100644 --- a/tests/ut/ops/test_activation.py +++ b/tests/ut/ops/test_activation.py @@ -100,6 +100,7 @@ def test_SiluAndMul_forward_310p( ): layer = SiluAndMul() out = layer.forward(dummy_tensor) + h = dummy_tensor.shape[-1] // 2 expected_arg = dummy_tensor[..., :h] # assert mock_maybe_prefetch_mlp_down_proj.call_count == 1 @@ -114,6 +115,5 @@ def test_SiluAndMul_forward_310p( actual_arg = mock_silu.call_args[0][0] assert torch.allclose(actual_arg, expected_arg), "swiglu called with unexpected input" - h = dummy_tensor.shape[-1] // 2 expected_out = (dummy_tensor[..., :h] + 1) * dummy_tensor[..., h:] assert torch.allclose(out, expected_out) \ No newline at end of file From b9df53ebaca23bfa32bd681b2bd246e3d3b9fef6 Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 11:38:40 +0800 Subject: [PATCH 06/10] trigger Signed-off-by: pu-zhe From db7a58912c81132f73d86ce37c46e1597ef7210d Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 12:12:32 +0800 Subject: [PATCH 07/10] trigger Signed-off-by: pu-zhe From d2d12f18eb32e84c3a068885022801e4ad29114d Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 13:59:07 +0800 Subject: [PATCH 08/10] trigger Signed-off-by: pu-zhe From 67c62a7d2e406273d221694352beef6e34d97527 Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 14:14:01 +0800 Subject: [PATCH 09/10] bugfix Signed-off-by: pu-zhe --- tests/ut/ops/test_layernorm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ut/ops/test_layernorm.py b/tests/ut/ops/test_layernorm.py index fc23f5656cc..1ec7043bf9e 100644 --- a/tests/ut/ops/test_layernorm.py +++ b/tests/ut/ops/test_layernorm.py @@ -66,7 +66,7 @@ def test_RMSNorm_forward( @pytest.mark.skipif(not is_310p_hw(), reason="310P device unittest case.") -@pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)]) +@pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float16)]) @patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P) @patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) def test_RMSNorm_forward_310p( @@ -75,8 +75,8 @@ def test_RMSNorm_forward_310p( layer = RMSNorm(hidden_size=8, eps=1e-05) if residual is not None: out_x, out_residual = layer.forward_oot(dummy_tensor, residual) - expected_out_x = dummy_tensor + residual - expected_out_residual = expected_out_x + 1 + expected_out_residual = dummy_tensor + residual + expected_out_x = expected_out_residual + 1 mock_rmsnorm.assert_called_once() assert torch.allclose(out_x, expected_out_x) assert torch.allclose(out_residual, expected_out_residual) From 48da5808330d63fa21d332c83a144ea452b86a4d Mon Sep 17 00:00:00 2001 From: pu-zhe Date: Tue, 27 Jan 2026 15:27:09 +0800 Subject: [PATCH 10/10] bugfix Signed-off-by: pu-zhe --- tests/ut/ops/test_activation.py | 4 ---- tests/ut/ops/test_layernorm.py | 6 ++---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/ut/ops/test_activation.py b/tests/ut/ops/test_activation.py index a8f0f314b8a..d05c7df128d 100644 --- a/tests/ut/ops/test_activation.py +++ b/tests/ut/ops/test_activation.py @@ -53,7 +53,6 @@ def test_QuickGELU_forward(mock_gelu, dummy_tensor, default_vllm_config): @pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.") -@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3) @patch("torch_npu.npu_swiglu", side_effect=lambda x: x + 1) @patch("torch.ops.vllm.maybe_wait_prefetch_done", side_effect=lambda x: None) @patch("torch.ops.vllm.maybe_prefetch_mlp_down_proj", side_effect=lambda x: None) @@ -61,7 +60,6 @@ def test_SiluAndMul_forward( mock_maybe_prefetch_mlp_down_proj, mock_maybe_wait_prefetch_done, mock_swiglu, - device_type, dummy_tensor, default_vllm_config, ): @@ -86,7 +84,6 @@ def test_SiluAndMul_forward( @pytest.mark.skipif(not is_310p_hw(), reason="310P device unittest case.") -@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P) @patch("torch.nn.functional.silu", side_effect=lambda x: x + 1) @patch("torch.ops.vllm.maybe_wait_prefetch_done", side_effect=lambda x: None) @patch("torch.ops.vllm.maybe_prefetch_mlp_down_proj", side_effect=lambda x: None) @@ -94,7 +91,6 @@ def test_SiluAndMul_forward_310p( mock_maybe_prefetch_mlp_down_proj, mock_maybe_wait_prefetch_done, mock_silu, - device_type, dummy_tensor, default_vllm_config, ): diff --git a/tests/ut/ops/test_layernorm.py b/tests/ut/ops/test_layernorm.py index 1ec7043bf9e..a86c6736d70 100644 --- a/tests/ut/ops/test_layernorm.py +++ b/tests/ut/ops/test_layernorm.py @@ -42,12 +42,11 @@ def default_vllm_config(): @pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.") @pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)]) -@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3) @patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) @patch("torch_npu.npu_add_rms_norm", side_effect=mock_add_rms_norm) @patch("torch.ops._C_ascend.npu_add_rms_norm_bias", side_effect=mock_add_rms_norm_bias) def test_RMSNorm_forward( - mock_add_rms_norm_bias, mock_add_rmsnorm, mock_rmsnorm, device_type, residual, dummy_tensor, default_vllm_config + mock_add_rms_norm_bias, mock_add_rmsnorm, mock_rmsnorm, residual, dummy_tensor, default_vllm_config ): layer = RMSNorm(hidden_size=8, eps=1e-05) if residual is not None: @@ -67,10 +66,9 @@ def test_RMSNorm_forward( @pytest.mark.skipif(not is_310p_hw(), reason="310P device unittest case.") @pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float16)]) -@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType._310P) @patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm) def test_RMSNorm_forward_310p( - mock_rmsnorm, device_type, residual, dummy_tensor, default_vllm_config + mock_rmsnorm, residual, dummy_tensor, default_vllm_config ): layer = RMSNorm(hidden_size=8, eps=1e-05) if residual is not None: