From 2b4da3df4ce4af65b15af16bef30ce8975652163 Mon Sep 17 00:00:00 2001 From: Erin Ho <14718778+hchings@users.noreply.github.com> Date: Thu, 17 Jul 2025 18:03:17 +0000 Subject: [PATCH] fix idx Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com> --- tensorrt_llm/_torch/pyexecutor/model_engine.py | 4 ++-- tests/integration/defs/llmapi/test_llm_examples.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorrt_llm/_torch/pyexecutor/model_engine.py b/tensorrt_llm/_torch/pyexecutor/model_engine.py index 998da7ed70c..7043bc445a9 100644 --- a/tensorrt_llm/_torch/pyexecutor/model_engine.py +++ b/tensorrt_llm/_torch/pyexecutor/model_engine.py @@ -2173,7 +2173,7 @@ def _execute_logit_post_processors(self, # Skip as we only need to apply logit processor on the last context request continue - logits_row = logits_tensor[request.py_batch_idx] + logits_row = logits_tensor[idx] # Reshape to align w/ the shape used in the TRT backend, # so the same logit processors can be used across both backends. logits_row = logits_row.view(1, 1, -1) @@ -2186,4 +2186,4 @@ def _execute_logit_post_processors(self, "defined in `tensorrtllm.sampling_params`.") lp(request.py_request_id, logits_row, token_ids, None, None) - logits_tensor[request.py_batch_idx] = logits_row.view(-1) + logits_tensor[idx] = logits_row.view(-1) diff --git a/tests/integration/defs/llmapi/test_llm_examples.py b/tests/integration/defs/llmapi/test_llm_examples.py index c9775d416dc..993372eb540 100644 --- a/tests/integration/defs/llmapi/test_llm_examples.py +++ b/tests/integration/defs/llmapi/test_llm_examples.py @@ -124,7 +124,6 @@ def test_llmapi_example_distributed_tp2(llm_root, engine_dir, llm_venv): "llm_inference_distributed.py") -@pytest.mark.skip(reason="https://nvbugs/5385576") def test_llmapi_example_logits_processor(llm_root, engine_dir, llm_venv): _run_llmapi_example(llm_root, engine_dir, llm_venv, "llm_logits_processor.py")