Skip to content

Commit 9af654c

Browse files
authored
[Responses API] Ignore store=True and process the request by default (#22185)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent a5fff3b commit 9af654c

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

vllm/entrypoints/openai/serving_responses.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,17 @@ def __init__(
9090
logger.info("Using default chat sampling params from %s: %s",
9191
source, self.default_sampling_params)
9292

93-
# False by default.
93+
# If False (default), the "store" option is (silently) ignored and the
94+
# response is not stored. If True, the response is stored in memory.
95+
# NOTE(woosuk): This may not be intuitive for users, as the default
96+
# behavior in OpenAI's Responses API is to store the response, but
97+
# vLLM's default behavior is not.
9498
self.enable_store = envs.VLLM_ENABLE_RESPONSES_API_STORE
99+
if self.enable_store:
100+
logger.warning_once(
101+
"`VLLM_ENABLE_RESPONSES_API_STORE` is enabled. This may "
102+
"cause a memory leak since we never remove responses from "
103+
"the store.")
95104
# HACK(woosuk): This is a hack. We should use a better store.
96105
# FIXME: If enable_store=True, this may cause a memory leak since we
97106
# never remove responses from the store.
@@ -121,9 +130,25 @@ async def create_responses(
121130
if self.engine_client.errored:
122131
raise self.engine_client.dead_error
123132

124-
# If store is not enabled, return an error.
125133
if request.store and not self.enable_store:
126-
return self._make_store_not_supported_error()
134+
if request.background:
135+
return self.create_error_response(
136+
err_type="invalid_request_error",
137+
message=(
138+
"This vLLM engine does not support `store=True` and "
139+
"therefore does not support the background mode. To "
140+
"enable these features, set the environment variable "
141+
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
142+
"the vLLM server."),
143+
status_code=HTTPStatus.BAD_REQUEST,
144+
)
145+
# Disable the store option.
146+
# NOTE(woosuk): Although returning an error is possible, we opted
147+
# to implicitly disable store and process the request anyway, as
148+
# we assume most users do not intend to actually store the response
149+
# (i.e., their request's `store=True` just because it's the default
150+
# value).
151+
request.store = False
127152

128153
# Handle the previous response ID.
129154
prev_response_id = request.previous_response_id

vllm/envs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1060,7 +1060,8 @@ def get_vllm_port() -> Optional[int]:
10601060

10611061
# Enables support for the "store" option in the OpenAI Responses API.
10621062
# When set to 1, vLLM's OpenAI server will retain the input and output
1063-
# messages for those requests in memory. By default, this is disabled (0).
1063+
# messages for those requests in memory. By default, this is disabled (0),
1064+
# and the "store" option is ignored.
10641065
# NOTE/WARNING:
10651066
# 1. Messages are kept in memory only (not persisted to disk) and will be
10661067
# lost when the vLLM server shuts down.

0 commit comments

Comments
 (0)