diff --git a/tensorrt_llm/_torch/pyexecutor/model_engine.py b/tensorrt_llm/_torch/pyexecutor/model_engine.py index 99ecac85e83..66301b642f7 100644 --- a/tensorrt_llm/_torch/pyexecutor/model_engine.py +++ b/tensorrt_llm/_torch/pyexecutor/model_engine.py @@ -888,9 +888,6 @@ def _create_warmup_request( available_tokens = kv_cache_manager.get_num_available_tokens( self.runtime_draft_len) available_blocks = kv_cache_manager.get_num_free_blocks() - print( - f"available_tokens: {available_tokens}, num_tokens: {num_tokens}, num_gen_requests: {num_gen_requests}" - ) if num_tokens > self.max_num_tokens or num_tokens > available_tokens: return None diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index ebc5330433d..d91cefa7fd8 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -376,7 +376,6 @@ accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype accuracy/test_llm_api_pytorch_multimodal.py::TestLlava_V1_6_Mistral_7B::test_auto_dtype SKIP (https://nvbugs/5707087) accuracy/test_llm_api_pytorch_multimodal.py::TestPhi4MMFusedVisionLora::test_auto_dtype SKIP (https://nvbugs/5707087) disaggregated/test_disaggregated.py::test_disaggregated_ctxtp2pp2_gentp2pp2[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5705199) -accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5707359) unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_wide_ep[NotEnabled] SKIP (https://nvbugs/5707392) accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_auto_dtype_tp2 SKIP (https://nvbugs/5707145) accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_fp8_prequantized_tp2 SKIP (https://nvbugs/5707145)