diff --git a/tests/integration/defs/perf/disagg/compare_backends.py b/tests/integration/defs/perf/disagg/compare_backends.py index c1a9ed541be..d6fb84e4cb2 100644 --- a/tests/integration/defs/perf/disagg/compare_backends.py +++ b/tests/integration/defs/perf/disagg/compare_backends.py @@ -2,6 +2,7 @@ """Compare performance test results between different backends (UCX vs NIXL).""" import argparse +import os import re import sys @@ -44,6 +45,10 @@ def compare_backends(csv_path, threshold=5.0, default_backend="NIXL"): Returns: DataFrame: Comparison results """ + if not os.path.exists(csv_path): + print(f"CSV file not found: {csv_path}") + sys.exit(0) + # Read CSV file df = pd.read_csv(csv_path) diff --git a/tests/integration/defs/perf/disagg/envs/ENV.md b/tests/integration/defs/perf/disagg/envs/ENV.md index 997fc151656..5d1f7320c94 100644 --- a/tests/integration/defs/perf/disagg/envs/ENV.md +++ b/tests/integration/defs/perf/disagg/envs/ENV.md @@ -15,7 +15,8 @@ export TRTLLM_WHEEL_PATH="" export GPU_TYPE="" export SLURM_PARTITION="" export SLURM_ACCOUNT="" -export MODEL_DIR="" +export MODEL_DIR="" +export DATASET_DIR="" export OUTPUT_PATH="" export PATH="" export XDG_CACHE_HOME="" @@ -70,10 +71,15 @@ SLURM account name for job billing and resource allocation. - **Example**: `your_project_account` ### `MODEL_DIR` -Base directory containing models and datasets. This path will be used to locate model checkpoints and dataset files. +Base directory containing models. This path will be used to locate model checkpoints. - **Format**: Absolute path - **Example**: `/shared/models/common` +### `DATASET_DIR` +Base directory containing dataset files. This path will be used to locate dataset files. +- **Format**: Absolute path +- **Example**: `/shared/datasets/common` + ### `OUTPUT_PATH` Directory where test results, HTML reports, and CSV files will be saved. - **Format**: Absolute path diff --git a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py index 7034254ee0d..9ab77714267 100644 --- a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py +++ b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py @@ -56,10 +56,8 @@ def exec_cmd_with_output(*popenargs, timeout: Optional[float] = None, **kwargs) check=True, **kwargs, ) - # Log stderr if it exists if result.stderr: stderr_output = result.stderr.decode() logger.error(f"Command stderr: {stderr_output}") - return result.stdout.decode() diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml index 33ee191ffd8..90a198897b6 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml index 12ac8edad06..120fc40b3c2 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index ab5bd6f7196..6a4f5f5ddfe 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index 7d8cb97621d..e8f1b31a411 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml index 3f9a7d6a2d1..2f9d1ad7c8a 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml index f2fd2bc21db..e60204a5624 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml index 5d9d739d58f..a307a87f173 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml index f97137297b6..d44c4d51e06 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml index 6b9078ac5a4..05c6794dd63 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml index 468354c0734..10aa98c4b30 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index a970ee6de44..64dd806fa6d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index 22dc90a06b3..b0b73132261 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml index a54b0dacd53..796fdbd8747 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml index ab081e78cfb..4a45880f147 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml index f4a5d3bc3a9..bc46d9fea34 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml index 93883653834..c397316b355 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 1eaf479dcca..5de651526ec 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 0 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json accuracy: datasets: - dataset_name: gsm8k diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml new file mode 100644 index 00000000000..4cbcd13dd5d --- /dev/null +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -0,0 +1,118 @@ +metadata: + model_name: kimi-k2-thinking-fp4 + precision: fp4 + model_dir_name: Kimi-K2-Thinking-NVFP4 + supported_gpus: + - GB200 + - GB300 + script_file: disaggr_torch.slurm + benchmark_type: 1k1k + config_index: 6 + dataset_file: disagg_datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json + accuracy: + datasets: + - dataset_name: gsm8k + expected_value: 0.9454 + threshold_type: hypothesis_test + filter_type: flexible-extract +slurm: + script_file: disaggr_torch.slurm + partition: + account: + job_time: 00:45:00 + job_name: unified-benchmark + extra_args: "--gres=gpu:4" + numa_bind: true +benchmark: + mode: gen_only + use_nv_sa_benchmark: false + multi_round: 8 + benchmark_ratio: 1.0 + streaming: true + concurrency_list: '16384' + input_length: 1024 + output_length: 1024 + dataset_file: +hardware: + gpus_per_node: 4 + num_ctx_servers: 3 + num_gen_servers: 1 +environment: + container_mount: + container_image: + model_path: + trtllm_repo: '' + build_wheel: false + work_dir: +profiling: + nsys_on: false +accuracy: + enable_accuracy_test: true + model: local-completions + tasks: gsm8k + model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 +worker_config: + gen: + enable_layerwise_nvtx_marker: true + tensor_parallel_size: 16 + moe_expert_parallel_size: 16 + enable_attention_dp: true + enable_lm_head_tp_in_adp: false + pipeline_parallel_size: 1 + max_batch_size: 1024 + max_num_tokens: 1024 + max_seq_len: 5120 + cuda_graph_config: + enable_padding: true + batch_sizes: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 2048 + - 1024 + print_iter_log: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.8 + dtype: fp8 + moe_config: + backend: WIDEEP + use_low_precision_moe_combine: true + load_balancer: + num_slots: 384 + layer_updates_per_iter: 1 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + stream_interval: 100 + num_postprocess_workers: 4 + trust_remote_code: true + ctx: + enable_layerwise_nvtx_marker: true + max_batch_size: 8 + max_num_tokens: 8448 + max_seq_len: 5120 + tensor_parallel_size: 4 + moe_expert_parallel_size: 4 + enable_attention_dp: true + pipeline_parallel_size: 1 + print_iter_log: true + cuda_graph_config: null + disable_overlap_scheduler: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.75 + dtype: fp8 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + trust_remote_code: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml index 60a221d9968..927fdae9885 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 8 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml index 8724f191f5e..8c138fc7f03 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 11 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 738c7206502..a4af6a85968 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 10 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index af30a466bea..cf7aaf0f6c1 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 13 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml index c44b3f6bba8..a56926befd4 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 9 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml index b7a79d74344..54854c0bf5c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 12 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index 73a27246c04..99121fca3d2 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 1 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml index e95e71ca155..6dcc5d71d38 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 3 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 6055421a278..d934ef4c0af 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 0 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml index 6b47c0fc36a..0a37ad83dbf 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 2 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml index 1e71708f577..9c045491ccf 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml @@ -2,14 +2,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 7 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml index 06900691bcc..fc4e31ed35c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 14 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index 13572a60499..83e3521db07 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 5 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml index 30e61523023..baaa80158b7 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 7 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 55391a698c4..7e722b4424f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 4 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index 62301215e96..2205179880f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -1,14 +1,14 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 6 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml new file mode 100644 index 00000000000..78081a23acc --- /dev/null +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -0,0 +1,112 @@ +metadata: + model_name: kimi-k2-thinking-fp4 + precision: fp4 + model_dir_name: Kimi-K2-Thinking-NVFP4 + supported_gpus: + - GB200 + - GB300 + script_file: disaggr_torch.slurm + benchmark_type: 1k1k + config_index: 6 + dataset_file: disagg_datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json +slurm: + script_file: disaggr_torch.slurm + partition: + account: + job_time: 00:45:00 + job_name: unified-benchmark + extra_args: "--gres=gpu:4" + numa_bind: true +benchmark: + mode: gen_only + use_nv_sa_benchmark: false + multi_round: 8 + benchmark_ratio: 1.0 + streaming: true + concurrency_list: '16384' + input_length: 1024 + output_length: 1024 + dataset_file: +hardware: + gpus_per_node: 4 + num_ctx_servers: 3 + num_gen_servers: 1 +environment: + container_mount: + container_image: + model_path: + trtllm_repo: '' + build_wheel: false + work_dir: +profiling: + nsys_on: false +accuracy: + enable_accuracy_test: false + model: local-completions + tasks: gsm8k + model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 +worker_config: + gen: + enable_layerwise_nvtx_marker: true + tensor_parallel_size: 16 + moe_expert_parallel_size: 16 + enable_attention_dp: true + enable_lm_head_tp_in_adp: false + pipeline_parallel_size: 1 + max_batch_size: 1024 + max_num_tokens: 1024 + max_seq_len: 2068 + cuda_graph_config: + enable_padding: true + batch_sizes: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 2048 + - 1024 + print_iter_log: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.8 + dtype: fp8 + moe_config: + backend: WIDEEP + use_low_precision_moe_combine: true + load_balancer: + num_slots: 384 + layer_updates_per_iter: 1 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + stream_interval: 100 + num_postprocess_workers: 4 + trust_remote_code: true + ctx: + enable_layerwise_nvtx_marker: true + max_batch_size: 8 + max_num_tokens: 8448 + max_seq_len: 1044 + tensor_parallel_size: 4 + moe_expert_parallel_size: 4 + enable_attention_dp: true + pipeline_parallel_size: 1 + print_iter_log: true + cuda_graph_config: null + disable_overlap_scheduler: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.75 + dtype: fp8 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + trust_remote_code: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml new file mode 100644 index 00000000000..ce6a85757b3 --- /dev/null +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml @@ -0,0 +1,112 @@ +metadata: + model_name: kimi-k2-thinking-fp4 + precision: fp4 + model_dir_name: Kimi-K2-Thinking-NVFP4 + supported_gpus: + - GB200 + - GB300 + script_file: disaggr_torch.slurm + benchmark_type: 8k1k + config_index: 6 + dataset_file: disagg_datasets/kimi-k2-8192-1024-20000-ratio-1_for_serve.json +slurm: + script_file: disaggr_torch.slurm + partition: + account: + job_time: 00:45:00 + job_name: unified-benchmark + extra_args: "--gres=gpu:4" + numa_bind: true +benchmark: + mode: gen_only + use_nv_sa_benchmark: false + multi_round: 8 + benchmark_ratio: 1.0 + streaming: true + concurrency_list: '8192' + input_length: 8192 + output_length: 1024 + dataset_file: +hardware: + gpus_per_node: 4 + num_ctx_servers: 8 + num_gen_servers: 1 +environment: + container_mount: + container_image: + model_path: + trtllm_repo: '' + build_wheel: false + work_dir: +profiling: + nsys_on: false +accuracy: + enable_accuracy_test: false + model: local-completions + tasks: gsm8k + model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 +worker_config: + gen: + enable_layerwise_nvtx_marker: true + tensor_parallel_size: 32 + moe_expert_parallel_size: 32 + enable_attention_dp: true + enable_lm_head_tp_in_adp: false + pipeline_parallel_size: 1 + max_batch_size: 256 + max_num_tokens: 256 + max_seq_len: 9256 + cuda_graph_config: + enable_padding: true + batch_sizes: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 2048 + - 256 + print_iter_log: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.6 + dtype: fp8 + moe_config: + backend: WIDEEP + use_low_precision_moe_combine: true + load_balancer: + num_slots: 416 + layer_updates_per_iter: 1 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + stream_interval: 100 + num_postprocess_workers: 4 + trust_remote_code: true + ctx: + enable_layerwise_nvtx_marker: true + max_batch_size: 1 + max_num_tokens: 8448 + max_seq_len: 8232 + tensor_parallel_size: 4 + moe_expert_parallel_size: 4 + enable_attention_dp: true + pipeline_parallel_size: 1 + print_iter_log: true + cuda_graph_config: null + disable_overlap_scheduler: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.75 + dtype: fp8 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + trust_remote_code: true diff --git a/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt new file mode 100644 index 00000000000..4e0bf609f2f --- /dev/null +++ b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt @@ -0,0 +1,2 @@ +test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX] +test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL] diff --git a/tests/integration/defs/perf/disagg/testlist/wideep.txt b/tests/integration/defs/perf/disagg/testlist/wideep.txt index 55e7bd47219..28684e096f0 100644 --- a/tests/integration/defs/perf/disagg/testlist/wideep.txt +++ b/tests/integration/defs/perf/disagg/testlist/wideep.txt @@ -8,6 +8,8 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL] test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL] test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT] +test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX] +test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL] @@ -15,3 +17,4 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_ # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX] test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL] +test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX] diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index 9fb72fbacb9..c050fdd4682 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -82,7 +82,11 @@ def get_trtllm_wheel_path() -> str: @staticmethod def get_model_dir() -> str: - return os.getenv("MODEL_DIR", "") + return os.getenv("MODEL_DIR", "") + + @staticmethod + def get_dataset_dir() -> str: + return os.getenv("DATASET_DIR", "") @staticmethod def get_output_path() -> str: @@ -99,10 +103,11 @@ def get_install_mode() -> str: return os.getenv("INSTALL_MODE", "none") @staticmethod - def get_container_mount() -> str: + def get_container_mount(model_name: str = "") -> str: work_dir = EnvManager.get_work_dir() script_dir = EnvManager.get_script_dir() model_dir = EnvManager.get_model_dir() + dataset_dir = EnvManager.get_dataset_dir() output_path = EnvManager.get_output_path() repo_dir = EnvManager.get_repo_dir() trtllm_wheel_path = EnvManager.get_trtllm_wheel_path() @@ -114,10 +119,16 @@ def get_container_mount() -> str: f"{output_path}:{output_path}", ] + # Kimi-K2 needs 640G of shared memory, otherwise will cause host memory OOM. + if model_name.find("kimi-k2") != -1: + mounts.append("tmpfs:/dev/shm:size=640G") + + if dataset_dir and not dataset_dir.startswith("<"): + mounts.append(f"{dataset_dir}:{dataset_dir}") # Add repo_dir if available - if repo_dir: + if repo_dir and not repo_dir.startswith("<"): mounts.append(f"{repo_dir}:{repo_dir}") - if trtllm_wheel_path: + if trtllm_wheel_path and not trtllm_wheel_path.startswith("<"): trtllm_wheel_dir = os.path.dirname(trtllm_wheel_path) mounts.append(f"{trtllm_wheel_dir}:{trtllm_wheel_dir}") return ",".join(mounts) diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py index f7eeafd0cde..7ee64d410d5 100644 --- a/tests/integration/defs/perf/disagg/utils/config_loader.py +++ b/tests/integration/defs/perf/disagg/utils/config_loader.py @@ -88,9 +88,9 @@ def get_all_dataset_names(self) -> List[str]: log_file="bench.log", extractor_pattern=r""" ^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Benchmark\ with\ concurrency\ (\d+)\ done """, metric_names=["SERVER_MEDIAN_TTFT", "SERVER_MEDIAN_E2EL"], @@ -99,21 +99,29 @@ def get_all_dataset_names(self) -> List[str]: log_file="bench.log", extractor_pattern=r""" ^.*?Mean\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Mean\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Mean\ ITL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ ITL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ ITL\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Mean\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Benchmark\ with\ concurrency\ (\d+)\ done """, metric_names=[ @@ -308,7 +316,7 @@ def _load_config_file(self, yaml_path: Path, test_type: str, test_category: str) supported_gpus = metadata.get("supported_gpus", ["GB200", "GB300", "H100", "B200", "B300"]) # Override config with environment variables (in memory only, do not write back) - config_data = self._apply_env_overrides(config_data) + config_data = self._apply_env_overrides(config_data, model_name) # Generate benchmark_type from sequence configuration benchmark_type = self._generate_benchmark_type(config_data) @@ -440,7 +448,7 @@ def _get_metrics_config( logger.debug(f"Using default metrics config for {test_category}") return default_config - def _apply_env_overrides(self, config_data: dict) -> dict: + def _apply_env_overrides(self, config_data: dict, model_name: str) -> dict: """Apply environment variable overrides to configuration. Intelligently replaces empty or None values based on field path. @@ -461,7 +469,7 @@ def _apply_env_overrides(self, config_data: dict) -> dict: ("slurm", "partition"): lambda: EnvManager.get_slurm_partition(), ("slurm", "account"): lambda: EnvManager.get_slurm_account(), ("slurm", "job_name"): lambda: EnvManager.get_slurm_job_name(), - ("environment", "container_mount"): lambda: EnvManager.get_container_mount(), + ("environment", "container_mount"): lambda: EnvManager.get_container_mount(model_name), ("environment", "container_image"): lambda: EnvManager.get_container_image(), ("environment", "trtllm_repo"): lambda: EnvManager.get_repo_dir(), ("environment", "trtllm_wheel_path"): lambda: EnvManager.get_trtllm_wheel_path(), @@ -500,7 +508,7 @@ def _get_dataset_file(self, config: dict) -> str: """ metadata = config.get("metadata", {}) dataset_file = metadata.get("dataset_file", "") - return os.path.join(EnvManager.get_model_dir(), dataset_file) + return os.path.join(EnvManager.get_dataset_dir(), dataset_file) def _get_script_file(self, config: dict) -> str: """Get script file by combining scripts directory with script file name. diff --git a/tests/integration/defs/perf/disagg/utils/config_validator.py b/tests/integration/defs/perf/disagg/utils/config_validator.py index 508e1b53acf..39b65a4e1b6 100644 --- a/tests/integration/defs/perf/disagg/utils/config_validator.py +++ b/tests/integration/defs/perf/disagg/utils/config_validator.py @@ -83,5 +83,5 @@ def _validate_ctx_and_gen_max_seq_length(extracted_config: dict) -> None: osl = extracted_config["osl"] ctx_max_seq_len = extracted_config["ctx_max_seq_len"] gen_max_seq_len = extracted_config["gen_max_seq_len"] - assert ctx_max_seq_len > (isl + osl), "config error: ctx_max_seq_len <= (isl + osl)" + assert ctx_max_seq_len > isl, "config error: ctx_max_seq_len > isl" assert gen_max_seq_len > (isl + osl), "config error: gen_max_seq_len <= (isl + osl)"