Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions jenkins/L0_Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,8 @@ def getMakoArgsFromStageName(stageName, parseSysinfo=false) {
makoArgs += ["auto_trigger=deepseek"]
} else if (stageName.contains("-GptOss-")) {
makoArgs += ["auto_trigger=gpt_oss"]
} else if (stageName.contains("-Qwen3Next-")) {
makoArgs += ["auto_trigger=qwen3_next"]
} else {
makoArgs += ["auto_trigger=others"]
}
Expand Down Expand Up @@ -2048,6 +2050,7 @@ def launchTestJobs(pipeline, testFilter)
"DGX_H100-4_GPUs-PyTorch-DeepSeek-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4],
"DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2", "l0_dgx_h100", 1, 1, 2],
"DGX_H100-4_GPUs-PyTorch-GptOss-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-PyTorch-Qwen3Next-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"A10-PyTorch-1": ["a10", "l0_a10", 1, 1],
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/defs/accuracy/references/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ Qwen3/Qwen3-235B-A22B:
quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 85.78
Qwen3/Qwen3-Next-80B-A3B-Thinking:
- accuracy: 81.577
moonshotai/Kimi-K2-Instruct:
- quant_algo: FP8_BLOCK_SCALES
accuracy: 94.84
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/defs/accuracy/references/mmlu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ Qwen3/Qwen3-235B-A22B:
quant_algo: NVFP4
kv_cache_quant_algo: FP8
accuracy: 86
Qwen3/Qwen3-Next-80B-A3B-Thinking:
- accuracy: 86
moonshotai/Kimi-K2-Instruct:
- quant_algo: FP8_BLOCK_SCALES
accuracy: 87.65
Expand Down
31 changes: 31 additions & 0 deletions tests/integration/defs/accuracy/test_llm_api_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3559,6 +3559,37 @@ def test_auto_dtype_tp4(self):
task.evaluate(llm)


@pytest.mark.skip_less_device_memory(80000)
class TestQwen3NextThinking(LlmapiAccuracyTestHarness):
MODEL_NAME = "Qwen3/Qwen3-Next-80B-A3B-Thinking"
MODEL_PATH = f"{llm_models_root()}/{MODEL_NAME}"

@skip_pre_hopper
@pytest.mark.skip_less_device(4)
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(4, 1, 4)],
ids=["tp4ep4"])
def test_auto_dtype(self, tp_size, pp_size, ep_size):
if get_device_count() != tp_size * pp_size:
pytest.skip("Device count mismatch with world size")

kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6,
enable_block_reuse=False)
cuda_graph_config = CudaGraphConfig(enable_padding=True,
max_batch_size=720)

with LLM(self.MODEL_PATH,
max_num_tokens=4096,
tensor_parallel_size=tp_size,
pipeline_parallel_size=pp_size,
moe_expert_parallel_size=ep_size,
kv_cache_config=kv_cache_config,
cuda_graph_config=cuda_graph_config) as llm:
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)


class TestNano_V2_VLM(LlmapiAccuracyTestHarness):
MODEL_NAME = "nvidia/Nano-v2-VLM"
MODEL_PATH = f"{llm_models_root()}/Nano-v2-VLM"
Expand Down
15 changes: 15 additions & 0 deletions tests/integration/test_lists/test-db/l0_dgx_h100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,21 @@ l0_dgx_h100:
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass-auto]
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton-auto]
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4-auto]
- condition:
ranges:
system_gpu_count:
gte: 4
lte: 4
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
auto_trigger: qwen3_next
tests:
- accuracy/test_llm_api_pytorch.py::TestQwen3NextThinking::test_auto_dtype[tp4ep4]
- condition:
ranges:
system_gpu_count:
Expand Down
Loading