diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index b53bd71a1cd1..f092a47fe1fc 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -4247,15 +4247,6 @@ def propose_draft_token_ids( self.input_batch.token_ids_cpu, slot_mappings=slot_mappings, ) - if isinstance(self.drafter, NgramProposer): - assert isinstance(sampled_token_ids, list), ( - "sampled_token_ids should be a python list when ngram is used." - ) - draft_token_ids = self.drafter.propose( - sampled_token_ids, - self.input_batch.num_tokens_no_spec, - self.input_batch.token_ids_cpu, - ) elif spec_config.use_ngram_gpu(): assert isinstance(self.drafter, NgramProposerGPU) (