Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions vllm/v1/sample/logits_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,14 +335,19 @@ def update_state(self, batch_update: Optional[BatchUpdate]):
if not batch_update:
return

needs_update: bool = False
# Process added requests.
needs_update = bool(batch_update.added)
for index, params, _ in batch_update.added:
if isinstance(params, SamplingParams) and (lb :=
params.logit_bias):
self.biases[index] = lb
needs_update = True
else:
self.biases.pop(index, None)
# Drop biases metadata at batch index
if self.biases.pop(index, None) is not None:
# If a new request replaces an old request which
# specified biases, we should update processor tensors
needs_update = True

if self.biases:
# Process removed requests.
Expand Down Expand Up @@ -419,17 +424,20 @@ def update_state(self, batch_update: Optional[BatchUpdate]):

if batch_update:
# Process added requests.
needs_update |= bool(batch_update.added)
for index, params, output_tok_ids in batch_update.added:
if (isinstance(params, SamplingParams)
and (min_tokens := params.min_tokens)
and len(output_tok_ids) < min_tokens):
# Replace request metadata at batch index
self.min_toks[index] = (min_tokens, output_tok_ids,
params.all_stop_token_ids)
needs_update = True
else:
# Drop request metadata at batch index
self.min_toks.pop(index, None)
# Drop min_toks metadata at batch index
if self.min_toks.pop(index, None) is not None:
# If a new request replaces an old request which
# specified min_toks, we should update processor tensors
needs_update = True

if self.min_toks:
# Process removed requests.
Expand Down