Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions tests/ut/batch_invariant/test_batch_invariant.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def test_init_batch_invariance(self, batch_invariant_enabled, has_backend, expec
"""Test init_batch_invariance under different conditions"""
# Mock dependencies
import vllm.envs as envs

envs.VLLM_BATCH_INVARIANT = batch_invariant_enabled
batch_invariant.HAS_TRITON = has_backend
batch_invariant.HAS_ASCENDC_BATCH_INVARIANT = has_backend
Expand All @@ -151,11 +152,8 @@ def test_init_batch_invariance(self, batch_invariant_enabled, has_backend, expec
def test_add_rms_norm(self, mock_torch_npu):
"""Test add_rms_norm function"""
# Mock dependencies
mock_torch = batch_invariant.torch

# Create mock tensors
batch_size = 2
hidden_size = 4
x = MagicMock(spec=torch.Tensor)
residual = MagicMock(spec=torch.Tensor)
weight = MagicMock(spec=torch.Tensor)
Expand Down Expand Up @@ -187,8 +185,6 @@ def test_add_rms_norm(self, mock_torch_npu):
def test_add_rms_norm_consistency(self, mock_torch_npu):
"""Test that add_rms_norm produces the same output as torch_npu.npu_add_rms_norm"""
# Create mock tensors
batch_size = 2
hidden_size = 4
x = MagicMock(spec=torch.Tensor)
residual = MagicMock(spec=torch.Tensor)
weight = MagicMock(spec=torch.Tensor)
Expand Down
495 changes: 267 additions & 228 deletions tests/ut/compilation/test_acl_graph.py

Large diffs are not rendered by default.

17 changes: 5 additions & 12 deletions tests/ut/compilation/test_npugraph_ex_utils_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
# This file is a part of the vllm-ascend project.
#

from vllm_ascend.compilation.passes.utils.npugraph_ex_utils_check import \
extra_stream_scope_check
from vllm_ascend.compilation.passes.utils.npugraph_ex_utils_check import extra_stream_scope_check


def test_extra_stream_scope_check_logic():
Expand All @@ -24,31 +23,25 @@ def test_extra_stream_scope_check_logic():
"""

class MockNode:

def __init__(self, stream_label=None):
self.op = "call_function"
self.meta = {"stream_label": stream_label}

class MockMatch:

def __init__(self, nodes):
self.nodes = nodes

# Test 1: all default → OK
assert extra_stream_scope_check(
MockMatch([MockNode(None), MockNode(None)])) is True
assert extra_stream_scope_check(MockMatch([MockNode(None), MockNode(None)])) is True

# Test 2: same non-default → OK
assert extra_stream_scope_check(
MockMatch([MockNode("s1"), MockNode("s1")])) is True
assert extra_stream_scope_check(MockMatch([MockNode("s1"), MockNode("s1")])) is True

# Test 3: mixed non-default → FAIL
assert extra_stream_scope_check(
MockMatch([MockNode("s1"), MockNode("s2")])) is False
assert extra_stream_scope_check(MockMatch([MockNode("s1"), MockNode("s2")])) is False

# Test 4: default + non-default → FAIL
assert extra_stream_scope_check(
MockMatch([MockNode(None), MockNode("s1")])) is False
assert extra_stream_scope_check(MockMatch([MockNode(None), MockNode("s1")])) is False

# Test 5: empty → OK
assert extra_stream_scope_check(MockMatch([])) is True
10 changes: 5 additions & 5 deletions tests/ut/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@

triton_runtime = MagicMock()
triton_runtime.driver.active.utils.get_device_properties.return_value = {
'num_aic': 8,
'num_vectorcore': 8,
"num_aic": 8,
"num_vectorcore": 8,
}
sys.modules['triton.runtime'] = triton_runtime
sys.modules["triton.runtime"] = triton_runtime

from vllm_ascend.utils import adapt_patch # noqa E402
from vllm_ascend.utils import register_ascend_customop # noqa E402

# triton and torch_npu is not available in the environment, so we need to mock them
sys.modules['torch_npu'].npu.current_device = MagicMock(return_value=0)
sys.modules['torch_npu._inductor'] = MagicMock()
sys.modules["torch_npu"].npu.current_device = MagicMock(return_value=0)
sys.modules["torch_npu._inductor"] = MagicMock()

adapt_patch()
adapt_patch(True)
Expand Down
60 changes: 19 additions & 41 deletions tests/ut/core/test_profiling_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Any, Dict, Optional
from unittest.mock import MagicMock, patch

import torch
from vllm.config import (CacheConfig, ModelConfig, ParallelConfig,
SchedulerConfig, VllmConfig)
from vllm.config import CacheConfig, ModelConfig, SchedulerConfig, VllmConfig
from vllm.sampling_params import SamplingParams
from vllm.utils.hashing import sha256
from vllm.v1.core.kv_cache_utils import (get_request_block_hasher,
init_none_hash)
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
KVCacheGroupSpec)
from vllm.v1.core.kv_cache_utils import get_request_block_hasher, init_none_hash
from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec
from vllm.v1.outputs import ModelRunnerOutput
from vllm.v1.request import Request
from vllm.v1.structured_output import StructuredOutputManager

from tests.ut.base import TestBase
from vllm_ascend.ascend_config import (ProfilingChunkConfig,
clear_ascend_config, init_ascend_config)
from vllm_ascend.core.profiling_chunk_predictor import (ChunkSizePredictor,
ProfilingChunkManager)
from vllm_ascend.core.scheduler_profiling_chunk import \
ProfilingChunkScheduler

from vllm_ascend.ascend_config import ProfilingChunkConfig, clear_ascend_config, init_ascend_config
from vllm_ascend.core.profiling_chunk_predictor import ChunkSizePredictor, ProfilingChunkManager
from vllm_ascend.core.scheduler_profiling_chunk import ProfilingChunkScheduler

MODEL = "Qwen/Qwen3-0.6B"
BLOCK_SIZE = 16
Expand All @@ -63,10 +55,7 @@ def create_requests(num_requests, num_tokens=10, max_tokens=16):

def make_output(scheduler):
req_ids = [req.request_id for req in scheduler.running]
req_id_to_index = {
req.request_id: i
for i, req in enumerate(scheduler.running)
}
req_id_to_index = {req.request_id: i for i, req in enumerate(scheduler.running)}
sampled_token_ids = [[1000]] * len(scheduler.running)
return ModelRunnerOutput(
req_ids=req_ids,
Expand All @@ -84,7 +73,6 @@ def make_output(scheduler):


class TestProfilingChunkConfig(TestBase):

def test_default_values(self):
cfg = ProfilingChunkConfig()
self.assertFalse(cfg.enabled)
Expand Down Expand Up @@ -144,10 +132,9 @@ def test_disabled_without_pp_ok(self, _mock):


class TestChunkSizePredictor(TestBase):

@staticmethod
def _make_data(a, b, c, seq_lens):
return [a * l * l + b * l + c for l in seq_lens]
return [a * seq_len * seq_len + b * seq_len + c for seq_len in seq_lens]

def test_fit_and_predict(self):
predictor = ChunkSizePredictor()
Expand All @@ -158,8 +145,7 @@ def test_fit_and_predict(self):
predictor.set_target_latency(8192)
predictor.is_ready = True

chunk = predictor.predict(
num_computed_tokens=0, base_chunk_size=8192, page_size=128)
chunk = predictor.predict(num_computed_tokens=0, base_chunk_size=8192, page_size=128)
self.assertIsNotNone(chunk)
self.assertEqual(chunk % 128, 0)

Expand Down Expand Up @@ -204,7 +190,6 @@ def test_fit_chunk_and_predict_with_history(self):


class TestProfilingChunkManager(TestBase):

def test_not_ready_before_profiling(self):
mgr = ProfilingChunkManager(base_chunk_size=8192, page_size=128)
self.assertFalse(mgr.is_ready)
Expand All @@ -213,7 +198,7 @@ def test_not_ready_before_profiling(self):
def test_run_profiling_success(self):
mgr = ProfilingChunkManager(base_chunk_size=8192, page_size=128)
seq_lens = list(range(64, 8256, 128))
latencies = [1e-6 * l * l + 0.01 * l + 1.0 for l in seq_lens]
latencies = [1e-6 * seq_len * seq_len + 0.01 * seq_len + 1.0 for seq_len in seq_lens]
self.assertTrue(mgr.predictor.fit(seq_lens, latencies))
mgr.predictor.set_target_latency(8192)
mgr.predictor.is_ready = True
Expand All @@ -233,15 +218,14 @@ def test_run_profiling_all_fail(self):
def test_record_batch_refines_model(self):
mgr = ProfilingChunkManager(base_chunk_size=8192, page_size=128)
seq_lens = list(range(64, 8256, 128))
latencies = [1e-6 * l * l + 0.01 * l + 1.0 for l in seq_lens]
latencies = [1e-6 * seq_len * seq_len + 0.01 * seq_len + 1.0 for seq_len in seq_lens]
mgr.predictor.fit(seq_lens, latencies)
mgr.predictor.set_target_latency(8192)
mgr.predictor.is_ready = True
mgr._profiling_done = True

for i in range(10):
mgr.record_batch_execution_time(
[(4096 - i * 100, i * 500)], 0.05 + i * 0.01)
mgr.record_batch_execution_time([(4096 - i * 100, i * 500)], 0.05 + i * 0.01)
self.assertGreaterEqual(len(mgr.chunked_fit_data), 10)
self.assertTrue(mgr.history_ready)

Expand All @@ -252,7 +236,6 @@ def test_record_batch_refines_model(self):


class TestProfilingChunkScheduler(TestBase):

@patch("vllm_ascend.ascend_config.AscendConfig.__init__", MagicMock(return_value=None))
@patch("vllm_ascend.ascend_config.get_ascend_config")
@patch("vllm.config.ModelConfig.__post_init__", MagicMock())
Expand All @@ -262,8 +245,7 @@ def create_scheduler(self, mock_get_ascend_config):
profiling_cfg.enabled = True
profiling_cfg.smooth_factor = 0.8
profiling_cfg.min_chunk = 256
mock_get_ascend_config.return_value = MagicMock(
profiling_chunk_config=profiling_cfg)
mock_get_ascend_config.return_value = MagicMock(profiling_chunk_config=profiling_cfg)

mock_hf_config = MagicMock()
mock_hf_config.model_type = "qwen3"
Expand Down Expand Up @@ -295,7 +277,8 @@ def create_scheduler(self, mock_get_ascend_config):
scheduler_config.chunked_prefill_enabled = True

cache_config = CacheConfig(
block_size=BLOCK_SIZE, gpu_memory_utilization=0.9,
block_size=BLOCK_SIZE,
gpu_memory_utilization=0.9,
cache_dtype="auto",
)

Expand All @@ -306,6 +289,7 @@ def create_scheduler(self, mock_get_ascend_config):
)
vllm_config.parallel_config.pipeline_parallel_size = 2
from unittest.mock import PropertyMock

type(model_config).is_encoder_decoder = PropertyMock(return_value=False)
vllm_config.model_config.hf_config.is_encoder_decoder = False

Expand All @@ -314,13 +298,8 @@ def create_scheduler(self, mock_get_ascend_config):
kv_cache_tensors=[],
kv_cache_groups=[
KVCacheGroupSpec(
['layer'],
FullAttentionSpec(
block_size=BLOCK_SIZE,
num_kv_heads=1,
head_size=1,
dtype=torch.float32
)
["layer"],
FullAttentionSpec(block_size=BLOCK_SIZE, num_kv_heads=1, head_size=1, dtype=torch.float32),
)
],
)
Expand Down Expand Up @@ -408,8 +387,7 @@ def test_schedule_chunked_prefill_running(self):
mock_executor.collective_rpc.return_value = [10.0]
scheduler.run_profiling_chunk_init(mock_executor)

requests = create_requests(num_requests=1, num_tokens=2000,
max_tokens=16)
requests = create_requests(num_requests=1, num_tokens=2000, max_tokens=16)
for req in requests:
scheduler.add_request(req)

Expand Down
Loading
Loading