ray-project · kouroshHakha · May 28, 2025 · May 28, 2025 · May 28, 2025
diff --git a/release/llm_tests/serve/test_llm_serve_integration.py b/release/llm_tests/serve/test_llm_serve_integration.py
@@ -0,0 +1,43 @@
+import pytest
+import sys
+
+from ray.llm._internal.serve.deployments.llm.vllm.vllm_loggers import (
+    RayPrometheusStatLogger,
+)
+from vllm import AsyncEngineArgs
+
+from vllm.v1.engine.async_llm import AsyncLLM
+from vllm.sampling_params import SamplingParams
+
+
+@pytest.mark.asyncio(scope="function")
+async def test_engine_metrics():
+    """
+    Test that the stat logger can be created successfully.
+    Keeping this test small to focus on instantiating the
+    derived class correctly.
+    """
+
+    engine_args = AsyncEngineArgs(
+        model="Qwen/Qwen2.5-0.5B-Instruct",
+        dtype="auto",
+        disable_log_stats=False,
+    )
+
+    engine = AsyncLLM.from_engine_args(
+        engine_args, stat_loggers=[RayPrometheusStatLogger]
+    )
+
+    for i, prompt in enumerate(["What is the capital of France?", "What is 2+2?"]):
+        results = engine.generate(
+            request_id=f"request-id-{i}",
+            prompt=prompt,
+            sampling_params=SamplingParams(max_tokens=10),
+        )
+
+        async for _ in results:
+            pass
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-v", __file__]))
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -4260,6 +4260,27 @@
     long_running: false
     script: pytest -vs test_llm_serve_correctness.py
 
+- name: llm_serve_integration
+  frequency: nightly
+  python: "3.11"
+  group: llm-serve
+  team: llm
+  working_dir: llm_tests/serve
+
+  cluster:
+    byod:
+      type: llm-cu124
+      runtime_env:
+        - VLLM_USE_V1=1
+    cluster_compute: llm_g5-4xlarge.yaml
+    # NOTE: Important for getting the correct secrets
+    cloud_id: cld_wy5a6nhazplvu32526ams61d98
+    project_id: prj_lhlrf1u5yv8qz9qg3xzw8fkiiq
+  run:
+    timeout: 3600
+    long_running: false
+    script: pytest -vs test_llm_serve_integration.py
+
 
 ##############
 # LLM Batch