Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions release/llm_tests/serve/test_llm_serve_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pytest
import sys

from ray.llm._internal.serve.deployments.llm.vllm.vllm_loggers import (
RayPrometheusStatLogger,
)
from vllm import AsyncEngineArgs

from vllm.v1.engine.async_llm import AsyncLLM
from vllm.sampling_params import SamplingParams


@pytest.mark.asyncio(scope="function")
async def test_engine_metrics():
"""
Test that the stat logger can be created successfully.
Keeping this test small to focus on instantiating the
derived class correctly.
"""

engine_args = AsyncEngineArgs(
model="Qwen/Qwen2.5-0.5B-Instruct",
dtype="auto",
disable_log_stats=False,
)

engine = AsyncLLM.from_engine_args(
engine_args, stat_loggers=[RayPrometheusStatLogger]
)

for i, prompt in enumerate(["What is the capital of France?", "What is 2+2?"]):
results = engine.generate(
request_id=f"request-id-{i}",
prompt=prompt,
sampling_params=SamplingParams(max_tokens=10),
)

async for _ in results:
pass


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))
21 changes: 21 additions & 0 deletions release/release_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4260,6 +4260,27 @@
long_running: false
script: pytest -vs test_llm_serve_correctness.py

- name: llm_serve_integration
frequency: nightly
python: "3.11"
group: llm-serve
team: llm
working_dir: llm_tests/serve

cluster:
byod:
type: llm-cu124
runtime_env:
- VLLM_USE_V1=1
cluster_compute: llm_g5-4xlarge.yaml
# NOTE: Important for getting the correct secrets
cloud_id: cld_wy5a6nhazplvu32526ams61d98
project_id: prj_lhlrf1u5yv8qz9qg3xzw8fkiiq
run:
timeout: 3600
long_running: false
script: pytest -vs test_llm_serve_integration.py


##############
# LLM Batch
Expand Down