diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index dca327cc5d07..579b3895bb71 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -160,20 +160,7 @@ def __init__(self, tokenizer: PreTrainedTokenizerFast, skip_special_tokens=sampling_params.skip_special_tokens) self.tokenizer: Tokenizer = tokenizer._tokenizer - - # Find a safe place to start. - prompt_suffix = request.prompt_token_ids - prompt_len = len(prompt_suffix) - if prompt_len > 4: - for i in range(4, min(prompt_len + 1, 24)): - suffix = request.prompt_token_ids[-i:] - if '�' not in self.tokenizer.decode(suffix): - prompt_suffix = suffix - break - - # Prime the stream. - for tid in prompt_suffix: - self.stream.step(self.tokenizer, tid) + self.stream.step(self.tokenizer, request.prompt_token_ids) self.spaces_between_special_tokens = ( sampling_params.skip_special_tokens