diff --git a/tests/test_metal_kernel_paged.py b/tests/test_metal_kernel_paged.py index 438637c4..4f39b491 100644 --- a/tests/test_metal_kernel_paged.py +++ b/tests/test_metal_kernel_paged.py @@ -189,11 +189,6 @@ def test_greedy_output_matches(self, qwen3_model): ) @pytest.mark.slow - @pytest.mark.xfail( - reason="B=2 batched GEMM produces different floats than B=1, " - "causing token divergence after ~5 decode steps (not a kernel bug). " - "See https://github.com/vllm-project/vllm-metal/issues/119" - ) def test_batched_decode_matches(self, qwen3_model): """Batched Metal kernel paged decode must match per-request sequential.""" model, tokenizer = qwen3_model