Skip to content

Commit 54ad131

Browse files
committed
Skip chunk prefill test case due to kernel incompatibility.
Signed-off-by: qixiang-99 <[email protected]>
1 parent d7ba3f2 commit 54ad131

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,26 @@ def test_auto_dtype_vswa(self):
479479
task = GSM8K(self.MODEL_NAME)
480480
task.evaluate(llm)
481481

482+
@pytest.mark.skip(
483+
reason=
484+
"remove this skip after the kernel support mentioned in this nvbug is fixed: https://nvbugspro.nvidia.com/bug/5338620"
485+
)
486+
def test_auto_dtype_chunked_prefill(self):
487+
# NOTE: Test with VSWA kv cache config.
488+
self.kv_cache_config.max_attention_window = [
489+
512, 512, 512, 512, 512, 32768
490+
] # Gemma3 1B attention window size pattern
491+
# chunked prefill case or more features
492+
extra_llm_config = dict(
493+
enable_chunked_prefill=True,
494+
max_num_tokens=1024,
495+
)
496+
with LLM(self.MODEL_PATH,
497+
kv_cache_config=self.kv_cache_config,
498+
**extra_llm_config) as llm:
499+
task = GSM8K(self.MODEL_NAME)
500+
task.evaluate(llm)
501+
482502

483503
class TestMixtral8x7B(LlmapiAccuracyTestHarness):
484504
MODEL_NAME = "mistralai/Mixtral-8x7B-v0.1"

0 commit comments

Comments
 (0)