diff --git a/release/llm_tests/serve/test_llm_serve_integration.py b/release/llm_tests/serve/test_llm_serve_integration.py new file mode 100644 index 000000000000..b4abdf72d342 --- /dev/null +++ b/release/llm_tests/serve/test_llm_serve_integration.py @@ -0,0 +1,43 @@ +import pytest +import sys + +from ray.llm._internal.serve.deployments.llm.vllm.vllm_loggers import ( + RayPrometheusStatLogger, +) +from vllm import AsyncEngineArgs + +from vllm.v1.engine.async_llm import AsyncLLM +from vllm.sampling_params import SamplingParams + + +@pytest.mark.asyncio(scope="function") +async def test_engine_metrics(): + """ + Test that the stat logger can be created successfully. + Keeping this test small to focus on instantiating the + derived class correctly. + """ + + engine_args = AsyncEngineArgs( + model="Qwen/Qwen2.5-0.5B-Instruct", + dtype="auto", + disable_log_stats=False, + ) + + engine = AsyncLLM.from_engine_args( + engine_args, stat_loggers=[RayPrometheusStatLogger] + ) + + for i, prompt in enumerate(["What is the capital of France?", "What is 2+2?"]): + results = engine.generate( + request_id=f"request-id-{i}", + prompt=prompt, + sampling_params=SamplingParams(max_tokens=10), + ) + + async for _ in results: + pass + + +if __name__ == "__main__": + sys.exit(pytest.main(["-v", __file__])) diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 9ce32ad723e8..26e2ced44938 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -4260,6 +4260,27 @@ long_running: false script: pytest -vs test_llm_serve_correctness.py +- name: llm_serve_integration + frequency: nightly + python: "3.11" + group: llm-serve + team: llm + working_dir: llm_tests/serve + + cluster: + byod: + type: llm-cu124 + runtime_env: + - VLLM_USE_V1=1 + cluster_compute: llm_g5-4xlarge.yaml + # NOTE: Important for getting the correct secrets + cloud_id: cld_wy5a6nhazplvu32526ams61d98 + project_id: prj_lhlrf1u5yv8qz9qg3xzw8fkiiq + run: + timeout: 3600 + long_running: false + script: pytest -vs test_llm_serve_integration.py + ############## # LLM Batch