Skip to content

Commit 14d95b2

Browse files
qixiang-99hlu1
authored andcommitted
Skip chunk prefill test case due to kernel incompatibility.
Signed-off-by: qixiang-99 <[email protected]>
1 parent 93b81bb commit 14d95b2

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,26 @@ def test_auto_dtype_vswa(self):
536536
task = GSM8K(self.MODEL_NAME)
537537
task.evaluate(llm)
538538

539+
@pytest.mark.skip(
540+
reason=
541+
"remove this skip after the kernel support mentioned in this nvbug is fixed: https://nvbugspro.nvidia.com/bug/5338620"
542+
)
543+
def test_auto_dtype_chunked_prefill(self):
544+
# NOTE: Test with VSWA kv cache config.
545+
self.kv_cache_config.max_attention_window = [
546+
512, 512, 512, 512, 512, 32768
547+
] # Gemma3 1B attention window size pattern
548+
# chunked prefill case or more features
549+
extra_llm_config = dict(
550+
enable_chunked_prefill=True,
551+
max_num_tokens=1024,
552+
)
553+
with LLM(self.MODEL_PATH,
554+
kv_cache_config=self.kv_cache_config,
555+
**extra_llm_config) as llm:
556+
task = GSM8K(self.MODEL_NAME)
557+
task.evaluate(llm)
558+
539559

540560
class TestMixtral8x7B(LlmapiAccuracyTestHarness):
541561
MODEL_NAME = "mistralai/Mixtral-8x7B-v0.1"

0 commit comments

Comments
 (0)