diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_cuda_causal_conv_cached_op.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_cuda_causal_conv_cached_op.py index b2745e51bfd..05ac3c70d22 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_cuda_causal_conv_cached_op.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_cuda_causal_conv_cached_op.py @@ -97,6 +97,7 @@ def test_generate_only_with_slot_mapping_cuda(conv_env): ) +@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5548861") def test_context_flattened_and_state_writeback_cuda(conv_env): device = conv_env["device"] dtype = conv_env["dtype"] diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_triton_mamba_cached_op.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_triton_mamba_cached_op.py index 97a23572206..6cce60f1684 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_triton_mamba_cached_op.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_triton_mamba_cached_op.py @@ -28,6 +28,7 @@ def mamba_env(): return {"device": device, "dtype": dtype, "atol": atol, "rtol": rtol} +@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5548861") def test_triton_generate_only_with_slot_mapping(mamba_env): device = mamba_env["device"] dtype = mamba_env["dtype"]