@@ -90,8 +90,17 @@ def __init__(
9090 logger .info ("Using default chat sampling params from %s: %s" ,
9191 source , self .default_sampling_params )
9292
93- # False by default.
93+ # If False (default), the "store" option is (silently) ignored and the
94+ # response is not stored. If True, the response is stored in memory.
95+ # NOTE(woosuk): This may not be intuitive for users, as the default
96+ # behavior in OpenAI's Responses API is to store the response, but
97+ # vLLM's default behavior is not.
9498 self .enable_store = envs .VLLM_ENABLE_RESPONSES_API_STORE
99+ if self .enable_store :
100+ logger .warning_once (
101+ "`VLLM_ENABLE_RESPONSES_API_STORE` is enabled. This may "
102+ "cause a memory leak since we never remove responses from "
103+ "the store." )
95104 # HACK(woosuk): This is a hack. We should use a better store.
96105 # FIXME: If enable_store=True, this may cause a memory leak since we
97106 # never remove responses from the store.
@@ -121,9 +130,25 @@ async def create_responses(
121130 if self .engine_client .errored :
122131 raise self .engine_client .dead_error
123132
124- # If store is not enabled, return an error.
125133 if request .store and not self .enable_store :
126- return self ._make_store_not_supported_error ()
134+ if request .background :
135+ return self .create_error_response (
136+ err_type = "invalid_request_error" ,
137+ message = (
138+ "This vLLM engine does not support `store=True` and "
139+ "therefore does not support the background mode. To "
140+ "enable these features, set the environment variable "
141+ "`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
142+ "the vLLM server." ),
143+ status_code = HTTPStatus .BAD_REQUEST ,
144+ )
145+ # Disable the store option.
146+ # NOTE(woosuk): Although returning an error is possible, we opted
147+ # to implicitly disable store and process the request anyway, as
148+ # we assume most users do not intend to actually store the response
149+ # (i.e., their request's `store=True` just because it's the default
150+ # value).
151+ request .store = False
127152
128153 # Handle the previous response ID.
129154 prev_response_id = request .previous_response_id
0 commit comments