From ec7291ed22778f21ff054bc87e9e320f50796072 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Mon, 8 Dec 2025 11:10:16 +0800 Subject: [PATCH 01/20] fix conflict Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- .../integration/defs/perf/disagg/execution/subprocess_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py index 7034254ee0d..9ab77714267 100644 --- a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py +++ b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py @@ -56,10 +56,8 @@ def exec_cmd_with_output(*popenargs, timeout: Optional[float] = None, **kwargs) check=True, **kwargs, ) - # Log stderr if it exists if result.stderr: stderr_output = result.stderr.decode() logger.error(f"Command stderr: {stderr_output}") - return result.stdout.decode() From a2b57d77e0705d1ff7862d03ee2d2cc63458ec27 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Wed, 3 Dec 2025 16:56:48 +0800 Subject: [PATCH 02/20] add debug.txt for debugging Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/testlist/debug.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/integration/defs/perf/disagg/testlist/debug.txt diff --git a/tests/integration/defs/perf/disagg/testlist/debug.txt b/tests/integration/defs/perf/disagg/testlist/debug.txt new file mode 100644 index 00000000000..6ed7b607ce2 --- /dev/null +++ b/tests/integration/defs/perf/disagg/testlist/debug.txt @@ -0,0 +1 @@ +test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL] \ No newline at end of file From fe09f9858aa100cb2b838b21bc6c37665fe725ff Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Wed, 3 Dec 2025 17:30:17 +0800 Subject: [PATCH 03/20] debugging Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/testlist/debug.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/defs/perf/disagg/testlist/debug.txt b/tests/integration/defs/perf/disagg/testlist/debug.txt index 6ed7b607ce2..5ae7ca3045e 100644 --- a/tests/integration/defs/perf/disagg/testlist/debug.txt +++ b/tests/integration/defs/perf/disagg/testlist/debug.txt @@ -1 +1 @@ -test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL] \ No newline at end of file +test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT] \ No newline at end of file From f2ea0597bafe2545313bb620615772e4ba16791d Mon Sep 17 00:00:00 2001 From: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> Date: Tue, 2 Dec 2025 03:40:40 -0800 Subject: [PATCH 04/20] Add Kimi K2 Thinking NVFP4 QA yaml files Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> --- ...en1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 111 ++++++++++++++++++ ...gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 111 ++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml create mode 100644 tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml new file mode 100644 index 00000000000..7bd30451d31 --- /dev/null +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -0,0 +1,111 @@ +metadata: + model_name: kimi-k2-thinking-fp4 + precision: fp4 + model_dir_name: Kimi-K2-Thinking-NVFP4 + supported_gpus: + - GB200 + - GB300 + script_file: disaggr_torch.slurm + benchmark_type: 1k1k + config_index: 6 + dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json +slurm: + script_file: disaggr_torch.slurm + partition: + account: + job_time: 00:45:00 + job_name: unified-benchmark + numa_bind: true +benchmark: + mode: gen_only + use_nv_sa_benchmark: false + multi_round: 8 + benchmark_ratio: 1.0 + streaming: true + concurrency_list: '16384' + input_length: 1024 + output_length: 1024 + dataset_file: +hardware: + gpus_per_node: 4 + num_ctx_servers: 3 + num_gen_servers: 1 +environment: + container_mount: + container_image: + model_path: + trtllm_repo: '' + build_wheel: false + work_dir: +profiling: + nsys_on: false +accuracy: + enable_accuracy_test: false + model: local-completions + tasks: gsm8k + model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 +worker_config: + gen: + enable_layerwise_nvtx_marker: true + tensor_parallel_size: 16 + moe_expert_parallel_size: 16 + enable_attention_dp: true + enable_lm_head_tp_in_adp: false + pipeline_parallel_size: 1 + max_batch_size: 1024 + max_num_tokens: 1024 + max_seq_len: 2068 + cuda_graph_config: + enable_padding: true + batch_sizes: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 2048 + - 1024 + print_iter_log: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.8 + dtype: fp8 + moe_config: + backend: WIDEEP + use_low_precision_moe_combine: true + load_balancer: + num_slots: 384 + layer_updates_per_iter: 1 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + stream_interval: 100 + num_postprocess_workers: 4 + trust_remote_code: true + ctx: + enable_layerwise_nvtx_marker: true + max_batch_size: 8 + max_num_tokens: 8448 + max_seq_len: 1044 + tensor_parallel_size: 4 + moe_expert_parallel_size: 4 + enable_attention_dp: true + pipeline_parallel_size: 1 + print_iter_log: true + cuda_graph_config: null + disable_overlap_scheduler: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.75 + dtype: fp8 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + trust_remote_code: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml new file mode 100644 index 00000000000..095d1a874dc --- /dev/null +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml @@ -0,0 +1,111 @@ +metadata: + model_name: kimi-k2-thinking-fp4 + precision: fp4 + model_dir_name: Kimi-K2-Thinking-NVFP4 + supported_gpus: + - GB200 + - GB300 + script_file: disaggr_torch.slurm + benchmark_type: 8k1k + config_index: 6 + dataset_file: datasets/kimi-k2-8192-1024-20000-ratio-1_for_serve.json +slurm: + script_file: disaggr_torch.slurm + partition: + account: + job_time: 00:45:00 + job_name: unified-benchmark + numa_bind: true +benchmark: + mode: gen_only + use_nv_sa_benchmark: false + multi_round: 8 + benchmark_ratio: 1.0 + streaming: true + concurrency_list: '8192' + input_length: 8192 + output_length: 1024 + dataset_file: +hardware: + gpus_per_node: 4 + num_ctx_servers: 8 + num_gen_servers: 1 +environment: + container_mount: + container_image: + model_path: + trtllm_repo: '' + build_wheel: false + work_dir: +profiling: + nsys_on: false +accuracy: + enable_accuracy_test: false + model: local-completions + tasks: gsm8k + model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 +worker_config: + gen: + enable_layerwise_nvtx_marker: true + tensor_parallel_size: 32 + moe_expert_parallel_size: 32 + enable_attention_dp: true + enable_lm_head_tp_in_adp: false + pipeline_parallel_size: 1 + max_batch_size: 256 + max_num_tokens: 256 + max_seq_len: 9256 + cuda_graph_config: + enable_padding: true + batch_sizes: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 2048 + - 256 + print_iter_log: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.6 + dtype: fp8 + moe_config: + backend: WIDEEP + use_low_precision_moe_combine: true + load_balancer: + num_slots: 416 + layer_updates_per_iter: 1 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + stream_interval: 100 + num_postprocess_workers: 4 + trust_remote_code: true + ctx: + enable_layerwise_nvtx_marker: true + max_batch_size: 1 + max_num_tokens: 8448 + max_seq_len: 8232 + tensor_parallel_size: 4 + moe_expert_parallel_size: 4 + enable_attention_dp: true + pipeline_parallel_size: 1 + print_iter_log: true + cuda_graph_config: null + disable_overlap_scheduler: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.75 + dtype: fp8 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + trust_remote_code: true From 95f37a51dfb732f940f4d5ca7fac5e97a14366e2 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Thu, 4 Dec 2025 10:30:18 +0800 Subject: [PATCH 05/20] add kimi k2 test for wideep both perf and accuracy Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- ...en1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 118 ++++++++++++++++++ ...en1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 1 + ...gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 1 + .../defs/perf/disagg/testlist/wideep.txt | 3 + 4 files changed, 123 insertions(+) create mode 100644 tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml new file mode 100644 index 00000000000..ba1c599055b --- /dev/null +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -0,0 +1,118 @@ +metadata: + model_name: kimi-k2-thinking-fp4 + precision: fp4 + model_dir_name: Kimi-K2-Thinking-NVFP4 + supported_gpus: + - GB200 + - GB300 + script_file: disaggr_torch.slurm + benchmark_type: 1k1k + config_index: 6 + dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json + accuracy: + datasets: + - dataset_name: gsm8k + expected_value: 0.9454 + threshold_type: hypothesis_test + filter_type: flexible-extract +slurm: + script_file: disaggr_torch.slurm + partition: + account: + job_time: 00:45:00 + job_name: unified-benchmark + extra_args: "--gres=gpu:4" + numa_bind: true +benchmark: + mode: gen_only + use_nv_sa_benchmark: false + multi_round: 8 + benchmark_ratio: 1.0 + streaming: true + concurrency_list: '16384' + input_length: 1024 + output_length: 1024 + dataset_file: +hardware: + gpus_per_node: 4 + num_ctx_servers: 3 + num_gen_servers: 1 +environment: + container_mount: + container_image: + model_path: + trtllm_repo: '' + build_wheel: false + work_dir: +profiling: + nsys_on: false +accuracy: + enable_accuracy_test: true + model: local-completions + tasks: gsm8k + model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 +worker_config: + gen: + enable_layerwise_nvtx_marker: true + tensor_parallel_size: 16 + moe_expert_parallel_size: 16 + enable_attention_dp: true + enable_lm_head_tp_in_adp: false + pipeline_parallel_size: 1 + max_batch_size: 1024 + max_num_tokens: 1024 + max_seq_len: 2068 + cuda_graph_config: + enable_padding: true + batch_sizes: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 2048 + - 1024 + print_iter_log: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.8 + dtype: fp8 + moe_config: + backend: WIDEEP + use_low_precision_moe_combine: true + load_balancer: + num_slots: 384 + layer_updates_per_iter: 1 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + stream_interval: 100 + num_postprocess_workers: 4 + trust_remote_code: true + ctx: + enable_layerwise_nvtx_marker: true + max_batch_size: 8 + max_num_tokens: 8448 + max_seq_len: 1044 + tensor_parallel_size: 4 + moe_expert_parallel_size: 4 + enable_attention_dp: true + pipeline_parallel_size: 1 + print_iter_log: true + cuda_graph_config: null + disable_overlap_scheduler: true + kv_cache_config: + enable_block_reuse: false + free_gpu_memory_fraction: 0.75 + dtype: fp8 + cache_transceiver_config: + max_tokens_in_buffer: 8448 + backend: UCX + trust_remote_code: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index 7bd30451d31..1f33b8d2450 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -15,6 +15,7 @@ slurm: account: job_time: 00:45:00 job_name: unified-benchmark + extra_args: "--gres=gpu:4" numa_bind: true benchmark: mode: gen_only diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml index 095d1a874dc..bb7b0efc016 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml @@ -15,6 +15,7 @@ slurm: account: job_time: 00:45:00 job_name: unified-benchmark + extra_args: "--gres=gpu:4" numa_bind: true benchmark: mode: gen_only diff --git a/tests/integration/defs/perf/disagg/testlist/wideep.txt b/tests/integration/defs/perf/disagg/testlist/wideep.txt index 55e7bd47219..ea1e0be57d4 100644 --- a/tests/integration/defs/perf/disagg/testlist/wideep.txt +++ b/tests/integration/defs/perf/disagg/testlist/wideep.txt @@ -8,6 +8,8 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL] test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL] test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT] +test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX] +test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL] @@ -15,3 +17,4 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_ # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX] test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL] +test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX] \ No newline at end of file From d5538ed5d0c9f9a45c6dc7dec536fbe8ac1a37a3 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Thu, 4 Dec 2025 10:34:23 +0800 Subject: [PATCH 06/20] remove debug txt file Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/testlist/debug.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tests/integration/defs/perf/disagg/testlist/debug.txt diff --git a/tests/integration/defs/perf/disagg/testlist/debug.txt b/tests/integration/defs/perf/disagg/testlist/debug.txt deleted file mode 100644 index 5ae7ca3045e..00000000000 --- a/tests/integration/defs/perf/disagg/testlist/debug.txt +++ /dev/null @@ -1 +0,0 @@ -test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT] \ No newline at end of file From 72e5ba7d92d55a8340c344f9bedb76f6366d99f3 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Thu, 4 Dec 2025 14:56:49 +0800 Subject: [PATCH 07/20] fix ctx max seq length Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +- ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +- ...ing-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index ba1c599055b..302ef4a0562 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -100,7 +100,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 8 max_num_tokens: 8448 - max_seq_len: 1044 + max_seq_len: 2068 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index 1f33b8d2450..1b9b1bc6c8d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -94,7 +94,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 8 max_num_tokens: 8448 - max_seq_len: 1044 + max_seq_len: 2068 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml index bb7b0efc016..63f78503d97 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml @@ -94,7 +94,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 1 max_num_tokens: 8448 - max_seq_len: 8232 + max_seq_len: 9256 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true From 538b9eae8ee159e42193150bad94c672384fbde3 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Thu, 4 Dec 2025 15:00:56 +0800 Subject: [PATCH 08/20] fx revert back to the origin ctx seq length Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +- ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +- ...ing-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index 302ef4a0562..ba1c599055b 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -100,7 +100,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 8 max_num_tokens: 8448 - max_seq_len: 2068 + max_seq_len: 1044 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index 1b9b1bc6c8d..1f33b8d2450 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -94,7 +94,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 8 max_num_tokens: 8448 - max_seq_len: 2068 + max_seq_len: 1044 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml index 63f78503d97..bb7b0efc016 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml @@ -94,7 +94,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 1 max_num_tokens: 8448 - max_seq_len: 9256 + max_seq_len: 8232 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true From 6967e574ea0a273eaf943cb47d57e0d8c44b80af Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Thu, 4 Dec 2025 15:01:58 +0800 Subject: [PATCH 09/20] fx seq length error Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/utils/config_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/defs/perf/disagg/utils/config_validator.py b/tests/integration/defs/perf/disagg/utils/config_validator.py index 508e1b53acf..39b65a4e1b6 100644 --- a/tests/integration/defs/perf/disagg/utils/config_validator.py +++ b/tests/integration/defs/perf/disagg/utils/config_validator.py @@ -83,5 +83,5 @@ def _validate_ctx_and_gen_max_seq_length(extracted_config: dict) -> None: osl = extracted_config["osl"] ctx_max_seq_len = extracted_config["ctx_max_seq_len"] gen_max_seq_len = extracted_config["gen_max_seq_len"] - assert ctx_max_seq_len > (isl + osl), "config error: ctx_max_seq_len <= (isl + osl)" + assert ctx_max_seq_len > isl, "config error: ctx_max_seq_len > isl" assert gen_max_seq_len > (isl + osl), "config error: gen_max_seq_len <= (isl + osl)" From 59a5542c9cea851bbb486c556d50815b3473d0ca Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:50:37 +0800 Subject: [PATCH 10/20] modify deepseek r1 fp4 model dir name Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/envs/ENV.md | 10 ++++++++-- ..._1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml | 2 +- ...4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml | 2 +- ...4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml | 2 +- ...p4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml | 2 +- ...4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml | 2 +- ...p4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml | 2 +- ...1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml | 2 +- ..._1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml | 2 +- ...4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml | 2 +- ...p4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml | 2 +- ...4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml | 2 +- ...p4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml | 2 +- ..._8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml | 2 +- ...4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml | 2 +- ..._8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml | 2 +- ...4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml | 2 +- ...1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml | 2 +- ...1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml | 2 +- ...1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml | 2 +- ..._ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml | 2 +- ...ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml | 2 +- ...k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml | 2 +- ...8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml | 2 +- ...k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml | 2 +- tests/integration/defs/perf/disagg/utils/common.py | 6 +++++- .../defs/perf/disagg/utils/config_loader.py | 2 +- 30 files changed, 41 insertions(+), 31 deletions(-) diff --git a/tests/integration/defs/perf/disagg/envs/ENV.md b/tests/integration/defs/perf/disagg/envs/ENV.md index 997fc151656..5d1f7320c94 100644 --- a/tests/integration/defs/perf/disagg/envs/ENV.md +++ b/tests/integration/defs/perf/disagg/envs/ENV.md @@ -15,7 +15,8 @@ export TRTLLM_WHEEL_PATH="" export GPU_TYPE="" export SLURM_PARTITION="" export SLURM_ACCOUNT="" -export MODEL_DIR="" +export MODEL_DIR="" +export DATASET_DIR="" export OUTPUT_PATH="" export PATH="" export XDG_CACHE_HOME="" @@ -70,10 +71,15 @@ SLURM account name for job billing and resource allocation. - **Example**: `your_project_account` ### `MODEL_DIR` -Base directory containing models and datasets. This path will be used to locate model checkpoints and dataset files. +Base directory containing models. This path will be used to locate model checkpoints. - **Format**: Absolute path - **Example**: `/shared/models/common` +### `DATASET_DIR` +Base directory containing dataset files. This path will be used to locate dataset files. +- **Format**: Absolute path +- **Example**: `/shared/datasets/common` + ### `OUTPUT_PATH` Directory where test results, HTML reports, and CSV files will be saved. - **Format**: Absolute path diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml index 33ee191ffd8..90a198897b6 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml index 12ac8edad06..120fc40b3c2 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index ab5bd6f7196..6a4f5f5ddfe 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index 7d8cb97621d..e8f1b31a411 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml index 3f9a7d6a2d1..2f9d1ad7c8a 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml index f2fd2bc21db..e60204a5624 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml index 5d9d739d58f..a307a87f173 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml index f97137297b6..d44c4d51e06 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml index 6b9078ac5a4..05c6794dd63 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml index 468354c0734..10aa98c4b30 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index a970ee6de44..64dd806fa6d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index 22dc90a06b3..b0b73132261 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml index a54b0dacd53..796fdbd8747 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml index ab081e78cfb..4a45880f147 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml index f4a5d3bc3a9..bc46d9fea34 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml index 93883653834..c397316b355 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 1eaf479dcca..300dd848972 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index 73a27246c04..ed1b087b28f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml index e95e71ca155..51e08813266 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 6055421a278..dafd262468e 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml index 6b47c0fc36a..6d2003c1734 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml index 1e71708f577..e1e14ad0adb 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml @@ -2,7 +2,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml index 06900691bcc..9ce43d18772 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index 13572a60499..0b4c8ea4d5b 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml index 30e61523023..040d9a14a27 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 55391a698c4..4be04844bb5 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index 62301215e96..77d42e88c8c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -1,7 +1,7 @@ metadata: model_name: deepseek-r1-fp4 precision: fp4 - model_dir_name: DeepSeek-R1-0528-FP4-V2 + model_dir_name: DeepSeek-R1-0528-FP4-v2 supported_gpus: - GB200 - GB300 diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index 9fb72fbacb9..e93a5c4075f 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -82,7 +82,11 @@ def get_trtllm_wheel_path() -> str: @staticmethod def get_model_dir() -> str: - return os.getenv("MODEL_DIR", "") + return os.getenv("MODEL_DIR", "") + + @staticmethod + def get_dataset_dir() -> str: + return os.getenv("DATASET_DIR", "") @staticmethod def get_output_path() -> str: diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py index f7eeafd0cde..d36313d2883 100644 --- a/tests/integration/defs/perf/disagg/utils/config_loader.py +++ b/tests/integration/defs/perf/disagg/utils/config_loader.py @@ -500,7 +500,7 @@ def _get_dataset_file(self, config: dict) -> str: """ metadata = config.get("metadata", {}) dataset_file = metadata.get("dataset_file", "") - return os.path.join(EnvManager.get_model_dir(), dataset_file) + return os.path.join(EnvManager.get_dataset_dir(), dataset_file) def _get_script_file(self, config: dict) -> str: """Get script file by combining scripts directory with script file name. From f40652dd8b904657c5726a646377247f6683f4a1 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Thu, 4 Dec 2025 17:13:53 +0800 Subject: [PATCH 11/20] add disagg gb300.txt Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt diff --git a/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt new file mode 100644 index 00000000000..5493e48cce7 --- /dev/null +++ b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt @@ -0,0 +1,2 @@ +test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX] +test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL] \ No newline at end of file From 56686514e1a3492e6ce2f07ef52eaacc2c264c57 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Fri, 5 Dec 2025 09:33:37 +0800 Subject: [PATCH 12/20] add dataset dir support Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/utils/common.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index e93a5c4075f..68805c9c388 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -107,6 +107,7 @@ def get_container_mount() -> str: work_dir = EnvManager.get_work_dir() script_dir = EnvManager.get_script_dir() model_dir = EnvManager.get_model_dir() + dataset_dir = EnvManager.get_dataset_dir() output_path = EnvManager.get_output_path() repo_dir = EnvManager.get_repo_dir() trtllm_wheel_path = EnvManager.get_trtllm_wheel_path() @@ -118,10 +119,12 @@ def get_container_mount() -> str: f"{output_path}:{output_path}", ] + if dataset_dir and not dataset_dir.startswith("<"): + mounts.append(f"{dataset_dir}:{dataset_dir}") # Add repo_dir if available - if repo_dir: + if repo_dir and not repo_dir.startswith("<"): mounts.append(f"{repo_dir}:{repo_dir}") - if trtllm_wheel_path: + if trtllm_wheel_path and not trtllm_wheel_path.startswith("<"): trtllm_wheel_dir = os.path.dirname(trtllm_wheel_path) mounts.append(f"{trtllm_wheel_dir}:{trtllm_wheel_dir}") return ",".join(mounts) From f318c8a482dd2b7255ab70b73aa6a53ba5f7810b Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Fri, 5 Dec 2025 09:54:33 +0800 Subject: [PATCH 13/20] fx csv file path issue Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/compare_backends.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/defs/perf/disagg/compare_backends.py b/tests/integration/defs/perf/disagg/compare_backends.py index c1a9ed541be..1a6272c6252 100644 --- a/tests/integration/defs/perf/disagg/compare_backends.py +++ b/tests/integration/defs/perf/disagg/compare_backends.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """Compare performance test results between different backends (UCX vs NIXL).""" - +import os import argparse import re import sys @@ -44,6 +44,10 @@ def compare_backends(csv_path, threshold=5.0, default_backend="NIXL"): Returns: DataFrame: Comparison results """ + if not os.path.exists(csv_path): + print(f"CSV file not found: {csv_path}") + sys.exit(0) + # Read CSV file df = pd.read_csv(csv_path) From c11445bdcdc2245d132d0fde6769086b51285367 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Fri, 5 Dec 2025 15:16:38 +0800 Subject: [PATCH 14/20] fix kimi k2 host memory error Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/utils/common.py | 6 +++++- tests/integration/defs/perf/disagg/utils/config_loader.py | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index 68805c9c388..47bd222ea73 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -103,7 +103,7 @@ def get_install_mode() -> str: return os.getenv("INSTALL_MODE", "none") @staticmethod - def get_container_mount() -> str: + def get_container_mount(model_name: str) -> str: work_dir = EnvManager.get_work_dir() script_dir = EnvManager.get_script_dir() model_dir = EnvManager.get_model_dir() @@ -119,6 +119,10 @@ def get_container_mount() -> str: f"{output_path}:{output_path}", ] + # Kimi-K2 needs 640G of shared memory, otherwise will cause host memory OOM. + if model_name.find("kimi-k2") != -1: + mounts.append(f"tmpfs:/dev/shm:size=640G") + if dataset_dir and not dataset_dir.startswith("<"): mounts.append(f"{dataset_dir}:{dataset_dir}") # Add repo_dir if available diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py index d36313d2883..fa423ac0639 100644 --- a/tests/integration/defs/perf/disagg/utils/config_loader.py +++ b/tests/integration/defs/perf/disagg/utils/config_loader.py @@ -308,7 +308,7 @@ def _load_config_file(self, yaml_path: Path, test_type: str, test_category: str) supported_gpus = metadata.get("supported_gpus", ["GB200", "GB300", "H100", "B200", "B300"]) # Override config with environment variables (in memory only, do not write back) - config_data = self._apply_env_overrides(config_data) + config_data = self._apply_env_overrides(config_data, model_name) # Generate benchmark_type from sequence configuration benchmark_type = self._generate_benchmark_type(config_data) @@ -440,7 +440,7 @@ def _get_metrics_config( logger.debug(f"Using default metrics config for {test_category}") return default_config - def _apply_env_overrides(self, config_data: dict) -> dict: + def _apply_env_overrides(self, config_data: dict, model_name: str) -> dict: """Apply environment variable overrides to configuration. Intelligently replaces empty or None values based on field path. @@ -461,7 +461,7 @@ def _apply_env_overrides(self, config_data: dict) -> dict: ("slurm", "partition"): lambda: EnvManager.get_slurm_partition(), ("slurm", "account"): lambda: EnvManager.get_slurm_account(), ("slurm", "job_name"): lambda: EnvManager.get_slurm_job_name(), - ("environment", "container_mount"): lambda: EnvManager.get_container_mount(), + ("environment", "container_mount"): lambda: EnvManager.get_container_mount(model_name), ("environment", "container_image"): lambda: EnvManager.get_container_image(), ("environment", "trtllm_repo"): lambda: EnvManager.get_repo_dir(), ("environment", "trtllm_wheel_path"): lambda: EnvManager.get_trtllm_wheel_path(), From 929e6fd4155c48e517edfc8f2175ab0f1a51c49d Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Fri, 5 Dec 2025 15:26:39 +0800 Subject: [PATCH 15/20] make model name an optional parameter here Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/utils/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index 47bd222ea73..543cc7a81ed 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -103,7 +103,7 @@ def get_install_mode() -> str: return os.getenv("INSTALL_MODE", "none") @staticmethod - def get_container_mount(model_name: str) -> str: + def get_container_mount(model_name: str = "") -> str: work_dir = EnvManager.get_work_dir() script_dir = EnvManager.get_script_dir() model_dir = EnvManager.get_model_dir() From 357451fa19dff386bb518b13a5e51a6718ab14b2 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Fri, 5 Dec 2025 16:16:17 +0800 Subject: [PATCH 16/20] update dataset path Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- ...r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +- ...22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml | 2 +- ...22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml | 2 +- ...2B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml | 2 +- ...22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml | 2 +- ...-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml | 2 +- ...k-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml | 2 +- ...r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml | 2 +- ...-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml | 2 +- ...fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml | 2 +- ...-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml | 2 +- ...k-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml | 2 +- ...-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml | 2 +- ...k-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml | 2 +- ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +- ...ing-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 2 +- 20 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 300dd848972..5de651526ec 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 0 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json accuracy: datasets: - dataset_name: gsm8k diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index ba1c599055b..42a52be3c3b 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 6 - dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json accuracy: datasets: - dataset_name: gsm8k diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml index 60a221d9968..927fdae9885 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 8 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml index 8724f191f5e..8c138fc7f03 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 11 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 738c7206502..a4af6a85968 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 10 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index af30a466bea..cf7aaf0f6c1 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 13 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml index c44b3f6bba8..a56926befd4 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 9 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml index b7a79d74344..54854c0bf5c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 12 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index ed1b087b28f..99121fca3d2 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 1 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml index 51e08813266..6dcc5d71d38 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 3 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index dafd262468e..d934ef4c0af 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 0 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml index 6d2003c1734..0a37ad83dbf 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 2 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml index e1e14ad0adb..9c045491ccf 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml @@ -9,7 +9,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 7 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml index 9ce43d18772..fc4e31ed35c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 14 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index 0b4c8ea4d5b..83e3521db07 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 5 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml index 040d9a14a27..baaa80158b7 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 7 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 4be04844bb5..7e722b4424f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 4 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index 77d42e88c8c..2205179880f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 6 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index 1f33b8d2450..78081a23acc 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 6 - dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml index bb7b0efc016..ce6a85757b3 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 6 - dataset_file: datasets/kimi-k2-8192-1024-20000-ratio-1_for_serve.json + dataset_file: disagg_datasets/kimi-k2-8192-1024-20000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: From 09f19ce76e0606533925ba4fdfa1ff61824ac296 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Mon, 8 Dec 2025 09:56:34 +0800 Subject: [PATCH 17/20] fix regex expression patterns for the new output format Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- .../defs/perf/disagg/utils/config_loader.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py index fa423ac0639..7ee64d410d5 100644 --- a/tests/integration/defs/perf/disagg/utils/config_loader.py +++ b/tests/integration/defs/perf/disagg/utils/config_loader.py @@ -88,9 +88,9 @@ def get_all_dataset_names(self) -> List[str]: log_file="bench.log", extractor_pattern=r""" ^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Benchmark\ with\ concurrency\ (\d+)\ done """, metric_names=["SERVER_MEDIAN_TTFT", "SERVER_MEDIAN_E2EL"], @@ -99,21 +99,29 @@ def get_all_dataset_names(self) -> List[str]: log_file="bench.log", extractor_pattern=r""" ^.*?Mean\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Mean\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Mean\ ITL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ ITL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ ITL\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Mean\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n + (?:.*\n)*? ^.*?P99\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n - ^.*?(?:\n|.)*?$\n + (?:.*\n)*? ^.*?Benchmark\ with\ concurrency\ (\d+)\ done """, metric_names=[ From 642039104c6b1b376aadf4d89194d4caeb613e38 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Mon, 8 Dec 2025 10:57:22 +0800 Subject: [PATCH 18/20] add kimi k2 test cases support Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- tests/integration/defs/perf/disagg/compare_backends.py | 3 ++- tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt | 2 +- tests/integration/defs/perf/disagg/testlist/wideep.txt | 2 +- tests/integration/defs/perf/disagg/utils/common.py | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/integration/defs/perf/disagg/compare_backends.py b/tests/integration/defs/perf/disagg/compare_backends.py index 1a6272c6252..d6fb84e4cb2 100644 --- a/tests/integration/defs/perf/disagg/compare_backends.py +++ b/tests/integration/defs/perf/disagg/compare_backends.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 """Compare performance test results between different backends (UCX vs NIXL).""" -import os + import argparse +import os import re import sys diff --git a/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt index 5493e48cce7..4e0bf609f2f 100644 --- a/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt +++ b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt @@ -1,2 +1,2 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX] -test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL] \ No newline at end of file +test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL] diff --git a/tests/integration/defs/perf/disagg/testlist/wideep.txt b/tests/integration/defs/perf/disagg/testlist/wideep.txt index ea1e0be57d4..28684e096f0 100644 --- a/tests/integration/defs/perf/disagg/testlist/wideep.txt +++ b/tests/integration/defs/perf/disagg/testlist/wideep.txt @@ -17,4 +17,4 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX] # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX] test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL] -test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX] \ No newline at end of file +test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX] diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index 543cc7a81ed..c050fdd4682 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -83,7 +83,7 @@ def get_trtllm_wheel_path() -> str: @staticmethod def get_model_dir() -> str: return os.getenv("MODEL_DIR", "") - + @staticmethod def get_dataset_dir() -> str: return os.getenv("DATASET_DIR", "") @@ -121,7 +121,7 @@ def get_container_mount(model_name: str = "") -> str: # Kimi-K2 needs 640G of shared memory, otherwise will cause host memory OOM. if model_name.find("kimi-k2") != -1: - mounts.append(f"tmpfs:/dev/shm:size=640G") + mounts.append("tmpfs:/dev/shm:size=640G") if dataset_dir and not dataset_dir.startswith("<"): mounts.append(f"{dataset_dir}:{dataset_dir}") From ed15eb2883743b8f72e15b81890415c8c7a38d53 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Mon, 8 Dec 2025 14:42:34 +0800 Subject: [PATCH 19/20] enhance ctx max seq length Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index 42a52be3c3b..95c1849c232 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -100,7 +100,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 8 max_num_tokens: 8448 - max_seq_len: 1044 + max_seq_len: 2068 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true From 0848ca824b770bcbcc066e39529bcfcfa59ea1e3 Mon Sep 17 00:00:00 2001 From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:20:40 +0800 Subject: [PATCH 20/20] enlarge ctx and gen max_seq_len for kimi k2 Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> --- ...-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml index 95c1849c232..4cbcd13dd5d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml @@ -61,7 +61,7 @@ worker_config: pipeline_parallel_size: 1 max_batch_size: 1024 max_num_tokens: 1024 - max_seq_len: 2068 + max_seq_len: 5120 cuda_graph_config: enable_padding: true batch_sizes: @@ -100,7 +100,7 @@ worker_config: enable_layerwise_nvtx_marker: true max_batch_size: 8 max_num_tokens: 8448 - max_seq_len: 2068 + max_seq_len: 5120 tensor_parallel_size: 4 moe_expert_parallel_size: 4 enable_attention_dp: true