Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/test_batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ def test_multiple_concurrent_requests(self, model_and_tokenizer):
assert len(finished) == len(prompts), f"Only {len(finished)} requests finished"


@pytest.mark.asyncio
@pytest.mark.anyio
class TestEngineAsync:
"""Async tests for the engine."""

Expand Down
22 changes: 11 additions & 11 deletions tests/test_batching_deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def sampling_params():
class TestDeterministicSingleRequest:
"""Test single request determinism."""

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_same_prompt_same_output(self, model_and_tokenizer, sampling_params):
"""Same prompt should produce same output with temp=0."""
from vllm_mlx import AsyncEngineCore, EngineConfig, SchedulerConfig
Expand Down Expand Up @@ -68,7 +68,7 @@ async def test_same_prompt_same_output(self, model_and_tokenizer, sampling_param
assert len(outputs) == 3
assert outputs[0] == outputs[1] == outputs[2], f"Outputs differ: {outputs}"

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_token_streaming_order(self, model_and_tokenizer, sampling_params):
"""Tokens should stream in order."""
from vllm_mlx import AsyncEngineCore
Expand All @@ -94,7 +94,7 @@ async def test_token_streaming_order(self, model_and_tokenizer, sampling_params)
class TestDeterministicConcurrentRequests:
"""Test concurrent request handling with determinism."""

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_concurrent_same_prompt(self, model_and_tokenizer):
"""Multiple concurrent requests with same prompt should get same output."""
from vllm_mlx import (
Expand Down Expand Up @@ -137,7 +137,7 @@ async def get_output(rid):
# All should be the same
assert all(r == results[0] for r in results), f"Outputs differ: {results}"

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_concurrent_different_prompts(self, model_and_tokenizer):
"""Different prompts should get different (but deterministic) outputs."""
from vllm_mlx import (
Expand Down Expand Up @@ -191,7 +191,7 @@ async def get_output(rid):
class TestBatchingPerformance:
"""Test that batching improves throughput."""

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_batched_faster_than_sequential(self, model_and_tokenizer):
"""Batched requests should be faster than sequential."""
from vllm_mlx import (
Expand Down Expand Up @@ -274,7 +274,7 @@ async def get_output(rid):
class TestRequestManagement:
"""Test request lifecycle management."""

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_abort_request(self, model_and_tokenizer):
"""Test aborting a request mid-generation."""
from vllm_mlx import AsyncEngineCore, SamplingParams
Expand Down Expand Up @@ -304,7 +304,7 @@ async def test_abort_request(self, model_and_tokenizer):
stats = engine.get_stats()
assert stats["active_requests"] == 0

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_engine_stats(self, model_and_tokenizer):
"""Test engine statistics tracking."""
from vllm_mlx import (
Expand Down Expand Up @@ -343,7 +343,7 @@ async def test_engine_stats(self, model_and_tokenizer):
class TestSchedulerPolicy:
"""Test scheduler policies."""

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_fcfs_ordering(self, model_and_tokenizer):
"""Test that FCFS policy processes requests in order."""
from vllm_mlx import (
Expand Down Expand Up @@ -396,7 +396,7 @@ async def track_completion(rid, name):
class TestEdgeCases:
"""Test edge cases and error handling."""

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_empty_prompt(self, model_and_tokenizer):
"""Test handling of empty prompt."""
from vllm_mlx import AsyncEngineCore, SamplingParams
Expand All @@ -414,7 +414,7 @@ async def test_empty_prompt(self, model_and_tokenizer):
assert out.finished
break

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_very_short_max_tokens(self, model_and_tokenizer):
"""Test with max_tokens=1."""
from vllm_mlx import AsyncEngineCore, SamplingParams
Expand All @@ -436,7 +436,7 @@ async def test_very_short_max_tokens(self, model_and_tokenizer):
# Should generate exactly 1 token
assert token_count == 1

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_multiple_start_stop(self, model_and_tokenizer):
"""Test starting and stopping engine multiple times."""
from vllm_mlx import AsyncEngineCore, SamplingParams
Expand Down
2 changes: 1 addition & 1 deletion tests/test_continuous_batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_scheduler_config_batching_params(self):
assert config.completion_batch_size == 32


@pytest.mark.asyncio
@pytest.mark.anyio
class TestContinuousBatchingIntegration:
"""Integration tests requiring actual model loading."""

Expand Down
12 changes: 4 additions & 8 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,9 +629,7 @@ def test_verify_api_key_rejects_invalid(self):

# Should raise HTTPException with 401
with pytest.raises(HTTPException) as exc_info:
asyncio.get_event_loop().run_until_complete(
server.verify_api_key(credentials)
)
asyncio.run(server.verify_api_key(credentials))

assert exc_info.value.status_code == 401
assert "Invalid API key" in str(exc_info.value.detail)
Expand All @@ -657,9 +655,7 @@ def test_verify_api_key_accepts_valid(self):
)

# Should not raise any exception
result = asyncio.get_event_loop().run_until_complete(
server.verify_api_key(credentials)
)
result = asyncio.run(server.verify_api_key(credentials))
# verify_api_key returns True on success (no exception raised)
assert result is True or result is None
finally:
Expand Down Expand Up @@ -716,7 +712,7 @@ def test_rate_limiter_window_cleanup(self):
class TestStreamChatCompletion:
"""Tests for streaming chat completion behavior."""

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_reasoning_stream_emits_structured_tool_calls(self, monkeypatch):
"""Tool markup after </think> should emit tool_calls chunks."""
from vllm_mlx.engine.base import GenerationOutput
Expand Down Expand Up @@ -837,7 +833,7 @@ def extract_tool_calls_streaming(
"total_tokens": 10,
}

@pytest.mark.asyncio
@pytest.mark.anyio
async def test_reasoning_stream_skips_tool_parser_until_markup_appears(
self, monkeypatch
):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_streaming_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ async def run_benchmark(
print(f"Throughput: {throughput:.1f} tokens/sec")


@pytest.mark.asyncio
@pytest.mark.anyio
async def test_output_collector():
"""Unit test for RequestOutputCollector."""
import sys
Expand Down
Loading