diff --git a/vllm_omni/worker/gpu_model_runner.py b/vllm_omni/worker/gpu_model_runner.py index 1f678b579fa..5ff62c11b40 100644 --- a/vllm_omni/worker/gpu_model_runner.py +++ b/vllm_omni/worker/gpu_model_runner.py @@ -1241,6 +1241,7 @@ def _preprocess( span_len = int(e) - int(s) # call the custom process function + req_infos["request_id"] = req_id embed_slice = inputs_embeds[s:e] if inputs_embeds is not None else None req_input_ids, req_embeds, update_dict = self.model.preprocess( input_ids=input_ids[s:e], input_embeds=embed_slice, **req_infos