@@ -433,6 +433,7 @@ def test_adjust_window_sizes_for_vswa(self):
433433 200 : [4 , 5 , 6 ],
434434 7000 : [7 , 8 ],
435435 }
436+ max_attention_window_vec = [100 ] * 4 + [200 ] * 3 + [7000 ] * 2
436437
437438 model_config = self .MockModelConfig ()
438439 model_config .num_attention_heads = 2
@@ -460,6 +461,7 @@ def test_adjust_window_sizes_for_vswa(self):
460461 100 : [0 , 1 , 2 , 3 ],
461462 130 : [4 , 5 , 6 , 7 , 8 ],
462463 },
464+ [100 ] * 4 + [130 ] * 5 ,
463465 None ,
464466 "limited_memory_clamped_windows" ),
465467 (
@@ -471,6 +473,7 @@ def test_adjust_window_sizes_for_vswa(self):
471473 200 : [4 , 5 , 6 ],
472474 1017 : [7 , 8 ],
473475 },
476+ [100 ] * 4 + [200 ] * 3 + [1017 ] * 2 ,
474477 None ,
475478 "less_limited_memory_clamped_windows" ),
476479 (
@@ -482,6 +485,7 @@ def test_adjust_window_sizes_for_vswa(self):
482485 200 : [4 , 5 , 6 ],
483486 7000 : [7 , 8 ],
484487 },
488+ [100 ] * 4 + [200 ] * 3 + [7000 ] * 2 ,
485489 None ,
486490 "sufficient_memory_no_clamping" ),
487491 (
@@ -490,6 +494,7 @@ def test_adjust_window_sizes_for_vswa(self):
490494 {
491495 51 : [0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ],
492496 },
497+ [51 ] * 9 ,
493498 None ,
494499 "very_limited_memory_all_clamped" ),
495500 (
@@ -501,15 +506,17 @@ def test_adjust_window_sizes_for_vswa(self):
501506 100 : [0 , 1 , 2 , 3 ],
502507 134 : [4 , 5 , 6 , 7 , 8 ],
503508 },
509+ [100 ] * 4 + [134 ] * 5 ,
504510 134 ,
505511 "less_limited_memory_but_clamped_by_max_tokens" ),
506512 ]
507513
508- for memory_bytes , expected_window_sizes , max_tokens , description in test_cases :
514+ for memory_bytes , expected_window_sizes , expected_max_attention_window_vec , max_tokens , description in test_cases :
509515 with self .subTest (case = description , memory_bytes = memory_bytes ):
510516 kv_cache_config = tllm .KvCacheConfig (max_tokens = max_tokens )
511- adjusted = KVCacheManager .adjust_window_sizes_for_vswa (
517+ adjusted , adjusted_max_attention_window_vec = KVCacheManager .adjust_window_sizes_for_vswa (
512518 window_size_to_layers = window_size_to_layers ,
519+ max_attention_window_vec = max_attention_window_vec ,
513520 model_config = model_config ,
514521 kv_cache_config = kv_cache_config ,
515522 pool_memory_bytes = memory_bytes ,
@@ -524,6 +531,13 @@ def test_adjust_window_sizes_for_vswa(self):
524531 f"Memory bytes: { memory_bytes } \n "
525532 f"Actual: { adjusted } \n "
526533 f"Expected: { expected_window_sizes } " )
534+ self .assertEqual (
535+ adjusted_max_attention_window_vec ,
536+ expected_max_attention_window_vec ,
537+ f"Test case '{ description } ' failed.\n "
538+ f"Memory bytes: { memory_bytes } \n "
539+ f"Actual: { adjusted_max_attention_window_vec } \n "
540+ f"Expected: { expected_max_attention_window_vec } " )
527541
528542
529543if __name__ == "__main__" :
0 commit comments