|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
3 | 3 |
|
4 | 4 | import asyncio |
5 | | -import contextlib |
6 | 5 | import random |
7 | | -import time |
8 | 6 | from typing import Callable |
9 | 7 |
|
10 | 8 | import openai |
11 | 9 | import pytest |
12 | 10 | import pytest_asyncio |
13 | | -import requests |
14 | 11 |
|
15 | 12 | from tests.utils import RemoteOpenAIServer |
16 | 13 |
|
@@ -87,54 +84,3 @@ async def get_status_code(**kwargs): |
87 | 84 |
|
88 | 85 | responses = await asyncio.gather(*[get_status_code(**b) for b in bodies]) |
89 | 86 | assert 500 not in responses |
90 | | - |
91 | | - |
92 | | -@pytest.mark.asyncio |
93 | | -@pytest.mark.parametrize( |
94 | | - ids=["single completion", "multiple completions", "chat"], |
95 | | - argnames=["create_func_gen", "content_body"], |
96 | | - argvalues=[ |
97 | | - (lambda x: x.completions.create, { |
98 | | - "prompt": " ".join(['A'] * 300_000) |
99 | | - }), |
100 | | - (lambda x: x.completions.create, { |
101 | | - "prompt": [" ".join(['A'] * 300_000)] * 2 |
102 | | - }), |
103 | | - (lambda x: x.chat.completions.create, { |
104 | | - "messages": [{ |
105 | | - "role": "user", |
106 | | - "content": " ".join(['A'] * 300_000) |
107 | | - }] |
108 | | - }), |
109 | | - ], |
110 | | -) |
111 | | -async def test_healthcheck_response_time( |
112 | | - server: RemoteOpenAIServer, |
113 | | - client: openai.AsyncOpenAI, |
114 | | - create_func_gen: Callable, |
115 | | - content_body: dict, |
116 | | -): |
117 | | - num_requests = 50 |
118 | | - |
119 | | - create_func = create_func_gen(client) |
120 | | - body = {"model": MODEL_NAME, **content_body, "max_tokens": 10} |
121 | | - |
122 | | - def get_response_time(url): |
123 | | - start_time = time.monotonic() |
124 | | - res = requests.get(url) |
125 | | - end_time = time.monotonic() |
126 | | - assert res.status_code == 200 |
127 | | - return end_time - start_time |
128 | | - |
129 | | - no_load_response_time = get_response_time(server.url_for("health")) |
130 | | - tasks = [ |
131 | | - asyncio.create_task(create_func(**body)) for _ in range(num_requests) |
132 | | - ] |
133 | | - await asyncio.sleep(1) # give the tasks a chance to start running |
134 | | - load_response_time = get_response_time(server.url_for("health")) |
135 | | - |
136 | | - with contextlib.suppress(openai.APIStatusError): |
137 | | - await asyncio.gather(*tasks) |
138 | | - |
139 | | - assert load_response_time < 100 * no_load_response_time |
140 | | - assert load_response_time < 0.1 |
0 commit comments