Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/_unit_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
SOC_VERSION: ascend910b1
MAX_JOBS: 4
COMPILE_CUSTOM_KERNELS: 0
steps:
- name: Install packages
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_test_light.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
with:
vllm: ${{ matrix.vllm_version }}
runner: linux-amd64-cpu-8-hk
image: quay.nju.edu.cn/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11
image: quay.nju.edu.cn/ascend/cann:8.5.0-910b-ubuntu22.04-py3.11
type: pr

e2e-light:
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,8 @@ def configure(self, ext: CMakeExtension) -> None:
)

def build_extensions(self) -> None:
if not envs.COMPILE_CUSTOM_KERNELS:
return
# Ensure that CMake is present and working
try:
subprocess.check_output(["cmake", "--version"])
Expand Down Expand Up @@ -423,7 +425,9 @@ def run(self):
# only checks out the commit. In this case, we set a dummy version.
VERSION = "0.0.0"

ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")]
ext_modules = []
if envs.COMPILE_CUSTOM_KERNELS:
ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")]


def get_path(*filepath) -> str:
Expand Down
3 changes: 2 additions & 1 deletion tests/ut/ops/test_layernorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def default_vllm_config():
with set_current_vllm_config(mock_config):
yield mock_config


@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
@pytest.mark.skipif(is_310p_hw(), reason="non_310P device unittest case.")
@pytest.mark.parametrize("residual", [None, torch.randn(4, 8, dtype=torch.float32)])
@patch("torch_npu.npu_rms_norm", side_effect=mock_rms_norm)
Expand Down
23 changes: 23 additions & 0 deletions tests/ut/ops/test_token_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from unittest.mock import MagicMock, PropertyMock, patch

import pytest
import torch

from tests.ut.base import TestBase
Expand Down Expand Up @@ -180,6 +181,8 @@ def tearDown(self):
self.patcher_npu_moe_init_routing_custom.stop()
self.patcher_npu_moe_token_unpermute.stop()

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch_without_expert_map(self):
hidden_states = torch.randn(3, 128)
topk_weights = torch.tensor([[0.7, 0.3], [0.6, 0.4], [0.5, 0.5]])
Expand All @@ -194,6 +197,8 @@ def test_token_dispatch_without_expert_map(self):

self.assertEqual(results.group_list_type, 1)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch_with_expert_map(self):
self.dispatcher.expert_map = torch.tensor([0, 1, 2, 3])
hidden_states = torch.randn(3, 128)
Expand All @@ -209,6 +214,8 @@ def test_token_dispatch_with_expert_map(self):

self.assertEqual(results.group_list_type, 1)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch_without_quant(self):
kwargs = {
"apply_router_weight_on_input": False,
Expand All @@ -229,6 +236,8 @@ def test_token_dispatch_without_quant(self):

self.assertEqual(results.group_list_type, 1)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch_with_quant(self):
kwargs = {
"apply_router_weight_on_input": False,
Expand All @@ -254,6 +263,8 @@ def test_token_dispatch_with_quant(self):
self.assertIsNotNone(results.dynamic_scale)
self.assertEqual(results.group_list_type, 1)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_combine_with_expert_map(self):
hidden_states = torch.randn(6, 128)
context_metadata = {
Expand All @@ -265,6 +276,8 @@ def test_token_combine_with_expert_map(self):
hidden_states, context_metadata).routed_out
self.assertEqual(final_hidden_states.shape, (6, 128))

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_combine_without_expert_map(self):
hidden_states = torch.randn(6, 128)
context_metadata = {
Expand All @@ -277,6 +290,8 @@ def test_token_combine_without_expert_map(self):
self.mock_npu_moe_token_unpermute.assert_called_once()
self.assertEqual(final_hidden_states.shape, (6, 128))

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch_with_router_weight(self):
self.dispatcher.apply_router_weight_on_input = True
hidden_states = torch.randn(3, 128)
Expand Down Expand Up @@ -381,6 +396,8 @@ def setUp(self):
num_local_experts=2,
with_quant=False)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch(self):
hidden_states = torch.randn(8, 16)
topk_weights = torch.rand(8, 4)
Expand All @@ -400,6 +417,8 @@ def test_token_dispatch(self):
self.assertIsNotNone(result.group_list)
self.assertEqual(result.group_list_type, 1)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_combine(self):
hidden_states = torch.randn(16, 16)
context_metadata = {
Expand All @@ -419,6 +438,8 @@ def test_token_combine(self):
self.assertIsNotNone(output)
self.assertEqual(output.routed_out.shape, (8, 16))

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch_with_quant(self):
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
num_experts=4,
Expand All @@ -444,6 +465,8 @@ def test_token_dispatch_with_quant(self):
self.assertIsNotNone(result.dynamic_scale)
self.assertEqual(result.group_list_type, 1)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_token_dispatch_with_quant_no_active_tokens(self):
self.dispatcher = TokenDispatcherWithAll2AllV(top_k=2,
num_experts=4,
Expand Down
3 changes: 3 additions & 0 deletions tests/ut/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from threading import Lock
from unittest import mock

import pytest
import torch
from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig,
VllmConfig)
Expand Down Expand Up @@ -104,6 +105,8 @@ def test_aligned_16(self):
output_tensor = utils.aligned_16(input_tensor)
self.assertEqual(output_tensor.shape[0], 32)

@pytest.mark.skip(
"Skip as register_kernels has NPU SocName checking in CANN 8.5.0.")
Comment thread
leo-pony marked this conversation as resolved.
def test_enable_custom_op(self):
result = utils.enable_custom_op()
self.assertTrue(result)
Expand Down
6 changes: 6 additions & 0 deletions vllm_ascend/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
# The build type of the package. It can be one of the following values:
# Release, Debug, RelWithDebugInfo. If not set, the default value is Release.
"CMAKE_BUILD_TYPE": lambda: os.getenv("CMAKE_BUILD_TYPE"),
# Whether to compile custom kernels. If not set, the default value is True.
# If set to False, the custom kernels will not be compiled.
# This configuration option should only be set to False when running UT
# scenarios in an environment without an NPU. Do not set it to False in
# other scenarios.
"COMPILE_CUSTOM_KERNELS": lambda: bool(int(os.getenv("COMPILE_CUSTOM_KERNELS", "1"))),
# The CXX compiler used for compiling the package. If not set, the default
# value is None, which means the system default CXX compiler will be used.
"CXX_COMPILER": lambda: os.getenv("CXX_COMPILER", None),
Expand Down
6 changes: 6 additions & 0 deletions vllm_ascend/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ def __init__(
# Additional parameters for compatibility with vllm
**kwargs):
"""Initialize the worker for Ascend."""
if not envs_ascend.COMPILE_CUSTOM_KERNELS:
logger.warning(
"COMPILE_CUSTOM_KERNELS is set to False. "
"In most scenarios, without custom kernels, vllm-ascend will not function correctly."
)

# register patch for vllm
from vllm_ascend.utils import adapt_patch
adapt_patch()
Expand Down
Loading