Skip to content

Commit bc5fe78

Browse files
authored
[chore] Fix KV cache block reuse flag name in quickstart_advanced (#3781)
Signed-off-by: Mike Iovine <[email protected]>
1 parent d0d19e8 commit bc5fe78

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

examples/pytorch/quickstart_advanced.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ def add_llm_args(parser):
6666

6767
# KV cache
6868
parser.add_argument('--kv_cache_dtype', type=str, default='auto')
69-
parser.add_argument('--kv_cache_enable_block_reuse',
70-
default=True,
71-
action='store_false')
69+
parser.add_argument('--disable_kv_cache_reuse',
70+
default=False,
71+
action='store_true')
7272
parser.add_argument("--kv_cache_fraction", type=float, default=None)
7373

7474
# Runtime
@@ -119,7 +119,7 @@ def setup_llm(args):
119119
enable_trtllm_decoder=args.enable_trtllm_decoder)
120120

121121
kv_cache_config = KvCacheConfig(
122-
enable_block_reuse=args.kv_cache_enable_block_reuse,
122+
enable_block_reuse=not args.disable_kv_cache_reuse,
123123
free_gpu_memory_fraction=args.kv_cache_fraction,
124124
)
125125

tests/integration/defs/test_e2e.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,7 +1616,7 @@ def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name,
16161616
f"{llm_models_root()}/{model_path}",
16171617
"--eagle_model_dir",
16181618
f"{llm_models_root()}/{eagle_model_path}",
1619-
"--kv_cache_enable_block_reuse",
1619+
"--disable_kv_cache_reuse",
16201620
])
16211621

16221622

@@ -1643,7 +1643,7 @@ def test_ptp_quickstart_advanced_deepseek_r1_8gpus(llm_root, llm_venv,
16431643
"--kv_cache_fraction=0.95",
16441644
"--max_batch_size=1",
16451645
"--max_seq_len=3000",
1646-
"--kv_cache_enable_block_reuse",
1646+
"--disable_kv_cache_reuse",
16471647
])
16481648

16491649

0 commit comments

Comments
 (0)