Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 9 additions & 15 deletions tests/integration/defs/accuracy/test_llm_api_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2916,11 +2916,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):

MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"

@pytest.mark.parametrize(
"moe_backend",
["CUTLASS",
pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
ids=["cutlass", "trtllm", "triton"])
@skip_pre_blackwell # Pre-Blackwell only supports w4a16, which is covered in test_w4a16. See also get_mxfp4_quant_algo.
Copy link
Collaborator

@dongfengy dongfengy Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only TRTLLM needs to be skipped pre-hopper and we already did.
I think this is the proper fix: #7332 / There is another fix for BF16 reference: #7323 With these two the test should run fine.

@pytest.mark.parametrize("moe_backend", ["CUTLASS", "TRTLLM", "TRITON"],
ids=["cutlass", "trtllm", "triton"])
@pytest.mark.parametrize("cuda_graph,overlap_scheduler", [
(True, True),
])
Expand Down Expand Up @@ -2950,12 +2948,10 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
task.evaluate(llm,
extra_evaluator_kwargs=self.extra_evaluator_kwargs)

@skip_pre_blackwell # Pre-Blackwell only supports w4a16, which is covered in test_w4a16. See also get_mxfp4_quant_algo.
@pytest.mark.skip_less_device(4)
@pytest.mark.parametrize(
"moe_backend",
["CUTLASS",
pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
ids=["cutlass", "trtllm", "triton"])
@pytest.mark.parametrize("moe_backend", ["CUTLASS", "TRTLLM", "TRITON"],
ids=["cutlass", "trtllm", "triton"])
@pytest.mark.parametrize(
"tp_size,pp_size,ep_size,attention_dp,cuda_graph,overlap_scheduler", [
(4, 1, 1, False, True, True),
Expand Down Expand Up @@ -3024,12 +3020,10 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
task.evaluate(llm,
extra_evaluator_kwargs=self.extra_evaluator_kwargs)

@skip_pre_blackwell # Pre-Blackwell only supports w4a16, which is covered in test_w4a16. See also get_mxfp4_quant_algo.
@pytest.mark.skip_less_device(2)
@pytest.mark.parametrize(
"moe_backend",
["CUTLASS",
pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
ids=["cutlass", "trtllm", "triton"])
@pytest.mark.parametrize("moe_backend", ["CUTLASS", "TRTLLM", "TRITON"],
ids=["cutlass", "trtllm", "triton"])
@pytest.mark.parametrize(
"tp_size,pp_size,ep_size,attention_dp,cuda_graph,overlap_scheduler", [
(2, 1, 1, False, True, True),
Expand Down
Loading