From ec7291ed22778f21ff054bc87e9e320f50796072 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Mon, 8 Dec 2025 11:10:16 +0800
Subject: [PATCH 01/20] fix conflict

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 .../integration/defs/perf/disagg/execution/subprocess_utils.py  | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py
index 7034254ee0d..9ab77714267 100644
--- a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py
+++ b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py
@@ -56,10 +56,8 @@ def exec_cmd_with_output(*popenargs, timeout: Optional[float] = None, **kwargs)
         check=True,
         **kwargs,
     )
-
     # Log stderr if it exists
     if result.stderr:
         stderr_output = result.stderr.decode()
         logger.error(f"Command stderr: {stderr_output}")
-
     return result.stdout.decode()

From a2b57d77e0705d1ff7862d03ee2d2cc63458ec27 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Wed, 3 Dec 2025 16:56:48 +0800
Subject: [PATCH 02/20] add debug.txt for debugging

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/testlist/debug.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tests/integration/defs/perf/disagg/testlist/debug.txt

diff --git a/tests/integration/defs/perf/disagg/testlist/debug.txt b/tests/integration/defs/perf/disagg/testlist/debug.txt
new file mode 100644
index 00000000000..6ed7b607ce2
--- /dev/null
+++ b/tests/integration/defs/perf/disagg/testlist/debug.txt
@@ -0,0 +1 @@
+test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL]
\ No newline at end of file

From fe09f9858aa100cb2b838b21bc6c37665fe725ff Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Wed, 3 Dec 2025 17:30:17 +0800
Subject: [PATCH 03/20] debugging

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/testlist/debug.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/defs/perf/disagg/testlist/debug.txt b/tests/integration/defs/perf/disagg/testlist/debug.txt
index 6ed7b607ce2..5ae7ca3045e 100644
--- a/tests/integration/defs/perf/disagg/testlist/debug.txt
+++ b/tests/integration/defs/perf/disagg/testlist/debug.txt
@@ -1 +1 @@
-test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL]
\ No newline at end of file
+test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT]
\ No newline at end of file

From f2ea0597bafe2545313bb620615772e4ba16791d Mon Sep 17 00:00:00 2001
From: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
Date: Tue, 2 Dec 2025 03:40:40 -0800
Subject: [PATCH 04/20] Add Kimi K2 Thinking NVFP4 QA yaml files

Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
---
 ...en1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 111 ++++++++++++++++++
 ...gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 111 ++++++++++++++++++
 2 files changed, 222 insertions(+)
 create mode 100644 tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
 create mode 100644 tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
new file mode 100644
index 00000000000..7bd30451d31
--- /dev/null
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -0,0 +1,111 @@
+metadata:
+  model_name: kimi-k2-thinking-fp4
+  precision: fp4
+  model_dir_name: Kimi-K2-Thinking-NVFP4
+  supported_gpus:
+  - GB200
+  - GB300
+  script_file: disaggr_torch.slurm
+  benchmark_type: 1k1k
+  config_index: 6
+  dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json
+slurm:
+  script_file: disaggr_torch.slurm
+  partition: <partition>
+  account: <account>
+  job_time: 00:45:00
+  job_name: unified-benchmark
+  numa_bind: true
+benchmark:
+  mode: gen_only
+  use_nv_sa_benchmark: false
+  multi_round: 8
+  benchmark_ratio: 1.0
+  streaming: true
+  concurrency_list: '16384'
+  input_length: 1024
+  output_length: 1024
+  dataset_file: <dataset_file>
+hardware:
+  gpus_per_node: 4
+  num_ctx_servers: 3
+  num_gen_servers: 1
+environment:
+  container_mount: <container_mount>
+  container_image: <container_image>
+  model_path: <model_path>
+  trtllm_repo: ''
+  build_wheel: false
+  work_dir: <full_path_to_work_dir>
+profiling:
+  nsys_on: false
+accuracy:
+  enable_accuracy_test: false
+  model: local-completions
+  tasks: gsm8k
+  model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
+worker_config:
+  gen:
+    enable_layerwise_nvtx_marker: true
+    tensor_parallel_size: 16
+    moe_expert_parallel_size: 16
+    enable_attention_dp: true
+    enable_lm_head_tp_in_adp: false
+    pipeline_parallel_size: 1
+    max_batch_size: 1024
+    max_num_tokens: 1024
+    max_seq_len: 2068
+    cuda_graph_config:
+      enable_padding: true
+      batch_sizes:
+      - 1
+      - 2
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+      - 128
+      - 256
+      - 512
+      - 768
+      - 1024
+      - 2048
+      - 1024
+    print_iter_log: true
+    kv_cache_config:
+      enable_block_reuse: false
+      free_gpu_memory_fraction: 0.8
+      dtype: fp8
+    moe_config:
+      backend: WIDEEP
+      use_low_precision_moe_combine: true
+      load_balancer:
+        num_slots: 384
+        layer_updates_per_iter: 1
+    cache_transceiver_config:
+      max_tokens_in_buffer: 8448
+      backend: UCX
+    stream_interval: 100
+    num_postprocess_workers: 4
+    trust_remote_code: true
+  ctx:
+    enable_layerwise_nvtx_marker: true
+    max_batch_size: 8
+    max_num_tokens: 8448
+    max_seq_len: 1044
+    tensor_parallel_size: 4
+    moe_expert_parallel_size: 4
+    enable_attention_dp: true
+    pipeline_parallel_size: 1
+    print_iter_log: true
+    cuda_graph_config: null
+    disable_overlap_scheduler: true
+    kv_cache_config:
+      enable_block_reuse: false
+      free_gpu_memory_fraction: 0.75
+      dtype: fp8
+    cache_transceiver_config:
+      max_tokens_in_buffer: 8448
+      backend: UCX
+    trust_remote_code: true
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
new file mode 100644
index 00000000000..095d1a874dc
--- /dev/null
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
@@ -0,0 +1,111 @@
+metadata:
+  model_name: kimi-k2-thinking-fp4
+  precision: fp4
+  model_dir_name: Kimi-K2-Thinking-NVFP4
+  supported_gpus:
+  - GB200
+  - GB300
+  script_file: disaggr_torch.slurm
+  benchmark_type: 8k1k
+  config_index: 6
+  dataset_file: datasets/kimi-k2-8192-1024-20000-ratio-1_for_serve.json
+slurm:
+  script_file: disaggr_torch.slurm
+  partition: <partition>
+  account: <account>
+  job_time: 00:45:00
+  job_name: unified-benchmark
+  numa_bind: true
+benchmark:
+  mode: gen_only
+  use_nv_sa_benchmark: false
+  multi_round: 8
+  benchmark_ratio: 1.0
+  streaming: true
+  concurrency_list: '8192'
+  input_length: 8192
+  output_length: 1024
+  dataset_file: <dataset_file>
+hardware:
+  gpus_per_node: 4
+  num_ctx_servers: 8
+  num_gen_servers: 1
+environment:
+  container_mount: <container_mount>
+  container_image: <container_image>
+  model_path: <model_path>
+  trtllm_repo: ''
+  build_wheel: false
+  work_dir: <full_path_to_work_dir>
+profiling:
+  nsys_on: false
+accuracy:
+  enable_accuracy_test: false
+  model: local-completions
+  tasks: gsm8k
+  model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
+worker_config:
+  gen:
+    enable_layerwise_nvtx_marker: true
+    tensor_parallel_size: 32
+    moe_expert_parallel_size: 32
+    enable_attention_dp: true
+    enable_lm_head_tp_in_adp: false
+    pipeline_parallel_size: 1
+    max_batch_size: 256
+    max_num_tokens: 256
+    max_seq_len: 9256
+    cuda_graph_config:
+      enable_padding: true
+      batch_sizes:
+      - 1
+      - 2
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+      - 128
+      - 256
+      - 512
+      - 768
+      - 1024
+      - 2048
+      - 256
+    print_iter_log: true
+    kv_cache_config:
+      enable_block_reuse: false
+      free_gpu_memory_fraction: 0.6
+      dtype: fp8
+    moe_config:
+      backend: WIDEEP
+      use_low_precision_moe_combine: true
+      load_balancer:
+        num_slots: 416
+        layer_updates_per_iter: 1
+    cache_transceiver_config:
+      max_tokens_in_buffer: 8448
+      backend: UCX
+    stream_interval: 100
+    num_postprocess_workers: 4
+    trust_remote_code: true
+  ctx:
+    enable_layerwise_nvtx_marker: true
+    max_batch_size: 1
+    max_num_tokens: 8448
+    max_seq_len: 8232
+    tensor_parallel_size: 4
+    moe_expert_parallel_size: 4
+    enable_attention_dp: true
+    pipeline_parallel_size: 1
+    print_iter_log: true
+    cuda_graph_config: null
+    disable_overlap_scheduler: true
+    kv_cache_config:
+      enable_block_reuse: false
+      free_gpu_memory_fraction: 0.75
+      dtype: fp8
+    cache_transceiver_config:
+      max_tokens_in_buffer: 8448
+      backend: UCX
+    trust_remote_code: true

From 95f37a51dfb732f940f4d5ca7fac5e97a14366e2 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Thu, 4 Dec 2025 10:30:18 +0800
Subject: [PATCH 05/20] add kimi k2 test for wideep both perf and accuracy

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 ...en1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 118 ++++++++++++++++++
 ...en1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml |   1 +
 ...gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml |   1 +
 .../defs/perf/disagg/testlist/wideep.txt      |   3 +
 4 files changed, 123 insertions(+)
 create mode 100644 tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
new file mode 100644
index 00000000000..ba1c599055b
--- /dev/null
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -0,0 +1,118 @@
+metadata:
+  model_name: kimi-k2-thinking-fp4
+  precision: fp4
+  model_dir_name: Kimi-K2-Thinking-NVFP4
+  supported_gpus:
+  - GB200
+  - GB300
+  script_file: disaggr_torch.slurm
+  benchmark_type: 1k1k
+  config_index: 6
+  dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json
+  accuracy:
+    datasets:
+    - dataset_name: gsm8k
+      expected_value: 0.9454
+      threshold_type: hypothesis_test
+      filter_type: flexible-extract
+slurm:
+  script_file: disaggr_torch.slurm
+  partition: <partition>
+  account: <account>
+  job_time: 00:45:00
+  job_name: unified-benchmark
+  extra_args: "--gres=gpu:4"
+  numa_bind: true
+benchmark:
+  mode: gen_only
+  use_nv_sa_benchmark: false
+  multi_round: 8
+  benchmark_ratio: 1.0
+  streaming: true
+  concurrency_list: '16384'
+  input_length: 1024
+  output_length: 1024
+  dataset_file: <dataset_file>
+hardware:
+  gpus_per_node: 4
+  num_ctx_servers: 3
+  num_gen_servers: 1
+environment:
+  container_mount: <container_mount>
+  container_image: <container_image>
+  model_path: <model_path>
+  trtllm_repo: ''
+  build_wheel: false
+  work_dir: <full_path_to_work_dir>
+profiling:
+  nsys_on: false
+accuracy:
+  enable_accuracy_test: true
+  model: local-completions
+  tasks: gsm8k
+  model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
+worker_config:
+  gen:
+    enable_layerwise_nvtx_marker: true
+    tensor_parallel_size: 16
+    moe_expert_parallel_size: 16
+    enable_attention_dp: true
+    enable_lm_head_tp_in_adp: false
+    pipeline_parallel_size: 1
+    max_batch_size: 1024
+    max_num_tokens: 1024
+    max_seq_len: 2068
+    cuda_graph_config:
+      enable_padding: true
+      batch_sizes:
+      - 1
+      - 2
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+      - 128
+      - 256
+      - 512
+      - 768
+      - 1024
+      - 2048
+      - 1024
+    print_iter_log: true
+    kv_cache_config:
+      enable_block_reuse: false
+      free_gpu_memory_fraction: 0.8
+      dtype: fp8
+    moe_config:
+      backend: WIDEEP
+      use_low_precision_moe_combine: true
+      load_balancer:
+        num_slots: 384
+        layer_updates_per_iter: 1
+    cache_transceiver_config:
+      max_tokens_in_buffer: 8448
+      backend: UCX
+    stream_interval: 100
+    num_postprocess_workers: 4
+    trust_remote_code: true
+  ctx:
+    enable_layerwise_nvtx_marker: true
+    max_batch_size: 8
+    max_num_tokens: 8448
+    max_seq_len: 1044
+    tensor_parallel_size: 4
+    moe_expert_parallel_size: 4
+    enable_attention_dp: true
+    pipeline_parallel_size: 1
+    print_iter_log: true
+    cuda_graph_config: null
+    disable_overlap_scheduler: true
+    kv_cache_config:
+      enable_block_reuse: false
+      free_gpu_memory_fraction: 0.75
+      dtype: fp8
+    cache_transceiver_config:
+      max_tokens_in_buffer: 8448
+      backend: UCX
+    trust_remote_code: true
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index 7bd30451d31..1f33b8d2450 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -15,6 +15,7 @@ slurm:
   account: <account>
   job_time: 00:45:00
   job_name: unified-benchmark
+  extra_args: "--gres=gpu:4"
   numa_bind: true
 benchmark:
   mode: gen_only
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
index 095d1a874dc..bb7b0efc016 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
@@ -15,6 +15,7 @@ slurm:
   account: <account>
   job_time: 00:45:00
   job_name: unified-benchmark
+  extra_args: "--gres=gpu:4"
   numa_bind: true
 benchmark:
   mode: gen_only
diff --git a/tests/integration/defs/perf/disagg/testlist/wideep.txt b/tests/integration/defs/perf/disagg/testlist/wideep.txt
index 55e7bd47219..ea1e0be57d4 100644
--- a/tests/integration/defs/perf/disagg/testlist/wideep.txt
+++ b/tests/integration/defs/perf/disagg/testlist/wideep.txt
@@ -8,6 +8,8 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_
 test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL]
 test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL]
 test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT]
+test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX]
+test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX]
 # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL]
 # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL]
 # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL]
@@ -15,3 +17,4 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_deepseek-r1-fp4_
 # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX]
 # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX]
 test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL]
+test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX]
\ No newline at end of file

From d5538ed5d0c9f9a45c6dc7dec536fbe8ac1a37a3 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Thu, 4 Dec 2025 10:34:23 +0800
Subject: [PATCH 06/20] remove debug txt file

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/testlist/debug.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 tests/integration/defs/perf/disagg/testlist/debug.txt

diff --git a/tests/integration/defs/perf/disagg/testlist/debug.txt b/tests/integration/defs/perf/disagg/testlist/debug.txt
deleted file mode 100644
index 5ae7ca3045e..00000000000
--- a/tests/integration/defs/perf/disagg/testlist/debug.txt
+++ /dev/null
@@ -1 +0,0 @@
-test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT]
\ No newline at end of file

From 72e5ba7d92d55a8340c344f9bedb76f6366d99f3 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Thu, 4 Dec 2025 14:56:49 +0800
Subject: [PATCH 07/20] fix ctx max seq length

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +-
 ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +-
 ...ing-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index ba1c599055b..302ef4a0562 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -100,7 +100,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 8
     max_num_tokens: 8448
-    max_seq_len: 1044
+    max_seq_len: 2068
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index 1f33b8d2450..1b9b1bc6c8d 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -94,7 +94,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 8
     max_num_tokens: 8448
-    max_seq_len: 1044
+    max_seq_len: 2068
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
index bb7b0efc016..63f78503d97 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
@@ -94,7 +94,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 1
     max_num_tokens: 8448
-    max_seq_len: 8232
+    max_seq_len: 9256
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true

From 538b9eae8ee159e42193150bad94c672384fbde3 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:00:56 +0800
Subject: [PATCH 08/20] fx revert back to the origin ctx seq length

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +-
 ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +-
 ...ing-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index 302ef4a0562..ba1c599055b 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -100,7 +100,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 8
     max_num_tokens: 8448
-    max_seq_len: 2068
+    max_seq_len: 1044
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index 1b9b1bc6c8d..1f33b8d2450 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -94,7 +94,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 8
     max_num_tokens: 8448
-    max_seq_len: 2068
+    max_seq_len: 1044
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
index 63f78503d97..bb7b0efc016 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
@@ -94,7 +94,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 1
     max_num_tokens: 8448
-    max_seq_len: 9256
+    max_seq_len: 8232
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true

From 6967e574ea0a273eaf943cb47d57e0d8c44b80af Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:01:58 +0800
Subject: [PATCH 09/20] fx seq length error

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/utils/config_validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/defs/perf/disagg/utils/config_validator.py b/tests/integration/defs/perf/disagg/utils/config_validator.py
index 508e1b53acf..39b65a4e1b6 100644
--- a/tests/integration/defs/perf/disagg/utils/config_validator.py
+++ b/tests/integration/defs/perf/disagg/utils/config_validator.py
@@ -83,5 +83,5 @@ def _validate_ctx_and_gen_max_seq_length(extracted_config: dict) -> None:
         osl = extracted_config["osl"]
         ctx_max_seq_len = extracted_config["ctx_max_seq_len"]
         gen_max_seq_len = extracted_config["gen_max_seq_len"]
-        assert ctx_max_seq_len > (isl + osl), "config error: ctx_max_seq_len <= (isl + osl)"
+        assert ctx_max_seq_len > isl, "config error: ctx_max_seq_len > isl"
         assert gen_max_seq_len > (isl + osl), "config error: gen_max_seq_len <= (isl + osl)"

From 59a5542c9cea851bbb486c556d50815b3473d0ca Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Thu, 4 Dec 2025 16:50:37 +0800
Subject: [PATCH 10/20] modify deepseek r1 fp4 model dir name

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/envs/ENV.md         | 10 ++++++++--
 ..._1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml |  2 +-
 ...4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml |  2 +-
 ...4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml |  2 +-
 ...p4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml |  2 +-
 ...4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml |  2 +-
 ...p4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml |  2 +-
 ...1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml |  2 +-
 ..._1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml |  2 +-
 ...4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml |  2 +-
 ...p4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml |  2 +-
 ...4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml |  2 +-
 ...p4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml |  2 +-
 ..._8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml |  2 +-
 ...4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml |  2 +-
 ..._8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml |  2 +-
 ...4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml |  2 +-
 ...1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml |  2 +-
 ...k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml |  2 +-
 ...1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml |  2 +-
 ...1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml |  2 +-
 ...k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml |  2 +-
 ..._ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml |  2 +-
 ...ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml |  2 +-
 ...k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml |  2 +-
 ...8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml |  2 +-
 ...k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml |  2 +-
 ...8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml |  2 +-
 tests/integration/defs/perf/disagg/utils/common.py     |  6 +++++-
 .../defs/perf/disagg/utils/config_loader.py            |  2 +-
 30 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/envs/ENV.md b/tests/integration/defs/perf/disagg/envs/ENV.md
index 997fc151656..5d1f7320c94 100644
--- a/tests/integration/defs/perf/disagg/envs/ENV.md
+++ b/tests/integration/defs/perf/disagg/envs/ENV.md
@@ -15,7 +15,8 @@ export TRTLLM_WHEEL_PATH="<your_tensorrt_llm_wheel_path>"
 export GPU_TYPE="<your_gpu_type>"
 export SLURM_PARTITION="<your_slurm_cluster_partition>"
 export SLURM_ACCOUNT="<your_slurm_cluster_account>"
-export MODEL_DIR="<your_model_and_dataset_path>"
+export MODEL_DIR="<your_model_path>"
+export DATASET_DIR="<your_dataset_path>"
 export OUTPUT_PATH="<your_html_and_csv_output_path>"
 export PATH="<please_add_poetry_binary_to_your_path>"
 export XDG_CACHE_HOME="<your_xdg_cache_home>"
@@ -70,10 +71,15 @@ SLURM account name for job billing and resource allocation.
 - **Example**: `your_project_account`
 
 ### `MODEL_DIR`
-Base directory containing models and datasets. This path will be used to locate model checkpoints and dataset files.
+Base directory containing models. This path will be used to locate model checkpoints.
 - **Format**: Absolute path
 - **Example**: `/shared/models/common`
 
+### `DATASET_DIR`
+Base directory containing dataset files. This path will be used to locate dataset files.
+- **Format**: Absolute path
+- **Example**: `/shared/datasets/common`
+
 ### `OUTPUT_PATH`
 Directory where test results, HTML reports, and CSV files will be saved.
 - **Format**: Absolute path
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml
index 33ee191ffd8..90a198897b6 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml
index 12ac8edad06..120fc40b3c2 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml
index ab5bd6f7196..6a4f5f5ddfe 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml
index 7d8cb97621d..e8f1b31a411 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml
index 3f9a7d6a2d1..2f9d1ad7c8a 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml
index f2fd2bc21db..e60204a5624 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml
index 5d9d739d58f..a307a87f173 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml
index f97137297b6..d44c4d51e06 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml
index 6b9078ac5a4..05c6794dd63 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml
index 468354c0734..10aa98c4b30 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml
index a970ee6de44..64dd806fa6d 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml
index 22dc90a06b3..b0b73132261 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml
index a54b0dacd53..796fdbd8747 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml
index ab081e78cfb..4a45880f147 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml
index f4a5d3bc3a9..bc46d9fea34 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml
index 93883653834..c397316b355 100644
--- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
index 1eaf479dcca..300dd848972 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
index 73a27246c04..ed1b087b28f 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
index e95e71ca155..51e08813266 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
index 6055421a278..dafd262468e 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
index 6b47c0fc36a..6d2003c1734 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml
index 1e71708f577..e1e14ad0adb 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml
@@ -2,7 +2,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml
index 06900691bcc..9ce43d18772 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
index 13572a60499..0b4c8ea4d5b 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
index 30e61523023..040d9a14a27 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
index 55391a698c4..4be04844bb5 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
index 62301215e96..77d42e88c8c 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
@@ -1,7 +1,7 @@
 metadata:
   model_name: deepseek-r1-fp4
   precision: fp4
-  model_dir_name: DeepSeek-R1-0528-FP4-V2
+  model_dir_name: DeepSeek-R1-0528-FP4-v2
   supported_gpus:
   - GB200
   - GB300
diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py
index 9fb72fbacb9..e93a5c4075f 100644
--- a/tests/integration/defs/perf/disagg/utils/common.py
+++ b/tests/integration/defs/perf/disagg/utils/common.py
@@ -82,7 +82,11 @@ def get_trtllm_wheel_path() -> str:
 
     @staticmethod
     def get_model_dir() -> str:
-        return os.getenv("MODEL_DIR", "<Your model and dataset directory>")
+        return os.getenv("MODEL_DIR", "<Your model directory>")
+    
+    @staticmethod
+    def get_dataset_dir() -> str:
+        return os.getenv("DATASET_DIR", "<Your dataset directory>")
 
     @staticmethod
     def get_output_path() -> str:
diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py
index f7eeafd0cde..d36313d2883 100644
--- a/tests/integration/defs/perf/disagg/utils/config_loader.py
+++ b/tests/integration/defs/perf/disagg/utils/config_loader.py
@@ -500,7 +500,7 @@ def _get_dataset_file(self, config: dict) -> str:
         """
         metadata = config.get("metadata", {})
         dataset_file = metadata.get("dataset_file", "")
-        return os.path.join(EnvManager.get_model_dir(), dataset_file)
+        return os.path.join(EnvManager.get_dataset_dir(), dataset_file)
 
     def _get_script_file(self, config: dict) -> str:
         """Get script file by combining scripts directory with script file name.

From f40652dd8b904657c5726a646377247f6683f4a1 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Thu, 4 Dec 2025 17:13:53 +0800
Subject: [PATCH 11/20] add disagg gb300.txt

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt

diff --git a/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt
new file mode 100644
index 00000000000..5493e48cce7
--- /dev/null
+++ b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt
@@ -0,0 +1,2 @@
+test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX]
+test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL]
\ No newline at end of file

From 56686514e1a3492e6ce2f07ef52eaacc2c264c57 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Fri, 5 Dec 2025 09:33:37 +0800
Subject: [PATCH 12/20] add dataset dir support

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/utils/common.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py
index e93a5c4075f..68805c9c388 100644
--- a/tests/integration/defs/perf/disagg/utils/common.py
+++ b/tests/integration/defs/perf/disagg/utils/common.py
@@ -107,6 +107,7 @@ def get_container_mount() -> str:
         work_dir = EnvManager.get_work_dir()
         script_dir = EnvManager.get_script_dir()
         model_dir = EnvManager.get_model_dir()
+        dataset_dir = EnvManager.get_dataset_dir()
         output_path = EnvManager.get_output_path()
         repo_dir = EnvManager.get_repo_dir()
         trtllm_wheel_path = EnvManager.get_trtllm_wheel_path()
@@ -118,10 +119,12 @@ def get_container_mount() -> str:
             f"{output_path}:{output_path}",
         ]
 
+        if dataset_dir and not dataset_dir.startswith("<"):
+            mounts.append(f"{dataset_dir}:{dataset_dir}")
         # Add repo_dir if available
-        if repo_dir:
+        if repo_dir and not repo_dir.startswith("<"):
             mounts.append(f"{repo_dir}:{repo_dir}")
-        if trtllm_wheel_path:
+        if trtllm_wheel_path and not trtllm_wheel_path.startswith("<"):
             trtllm_wheel_dir = os.path.dirname(trtllm_wheel_path)
             mounts.append(f"{trtllm_wheel_dir}:{trtllm_wheel_dir}")
         return ",".join(mounts)

From f318c8a482dd2b7255ab70b73aa6a53ba5f7810b Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Fri, 5 Dec 2025 09:54:33 +0800
Subject: [PATCH 13/20] fx csv file path issue

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/compare_backends.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/integration/defs/perf/disagg/compare_backends.py b/tests/integration/defs/perf/disagg/compare_backends.py
index c1a9ed541be..1a6272c6252 100644
--- a/tests/integration/defs/perf/disagg/compare_backends.py
+++ b/tests/integration/defs/perf/disagg/compare_backends.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """Compare performance test results between different backends (UCX vs NIXL)."""
-
+import os
 import argparse
 import re
 import sys
@@ -44,6 +44,10 @@ def compare_backends(csv_path, threshold=5.0, default_backend="NIXL"):
     Returns:
         DataFrame: Comparison results
     """
+    if not os.path.exists(csv_path):
+        print(f"CSV file not found: {csv_path}")
+        sys.exit(0)
+
     # Read CSV file
     df = pd.read_csv(csv_path)
 

From c11445bdcdc2245d132d0fde6769086b51285367 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Fri, 5 Dec 2025 15:16:38 +0800
Subject: [PATCH 14/20] fix kimi k2 host memory error

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/utils/common.py        | 6 +++++-
 tests/integration/defs/perf/disagg/utils/config_loader.py | 6 +++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py
index 68805c9c388..47bd222ea73 100644
--- a/tests/integration/defs/perf/disagg/utils/common.py
+++ b/tests/integration/defs/perf/disagg/utils/common.py
@@ -103,7 +103,7 @@ def get_install_mode() -> str:
         return os.getenv("INSTALL_MODE", "none")
 
     @staticmethod
-    def get_container_mount() -> str:
+    def get_container_mount(model_name: str) -> str:
         work_dir = EnvManager.get_work_dir()
         script_dir = EnvManager.get_script_dir()
         model_dir = EnvManager.get_model_dir()
@@ -119,6 +119,10 @@ def get_container_mount() -> str:
             f"{output_path}:{output_path}",
         ]
 
+        # Kimi-K2 needs 640G of shared memory, otherwise will cause host memory OOM.
+        if model_name.find("kimi-k2") != -1:
+            mounts.append(f"tmpfs:/dev/shm:size=640G")
+
         if dataset_dir and not dataset_dir.startswith("<"):
             mounts.append(f"{dataset_dir}:{dataset_dir}")
         # Add repo_dir if available
diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py
index d36313d2883..fa423ac0639 100644
--- a/tests/integration/defs/perf/disagg/utils/config_loader.py
+++ b/tests/integration/defs/perf/disagg/utils/config_loader.py
@@ -308,7 +308,7 @@ def _load_config_file(self, yaml_path: Path, test_type: str, test_category: str)
         supported_gpus = metadata.get("supported_gpus", ["GB200", "GB300", "H100", "B200", "B300"])
 
         # Override config with environment variables (in memory only, do not write back)
-        config_data = self._apply_env_overrides(config_data)
+        config_data = self._apply_env_overrides(config_data, model_name)
 
         # Generate benchmark_type from sequence configuration
         benchmark_type = self._generate_benchmark_type(config_data)
@@ -440,7 +440,7 @@ def _get_metrics_config(
             logger.debug(f"Using default metrics config for {test_category}")
             return default_config
 
-    def _apply_env_overrides(self, config_data: dict) -> dict:
+    def _apply_env_overrides(self, config_data: dict, model_name: str) -> dict:
         """Apply environment variable overrides to configuration.
 
         Intelligently replaces empty or None values based on field path.
@@ -461,7 +461,7 @@ def _apply_env_overrides(self, config_data: dict) -> dict:
             ("slurm", "partition"): lambda: EnvManager.get_slurm_partition(),
             ("slurm", "account"): lambda: EnvManager.get_slurm_account(),
             ("slurm", "job_name"): lambda: EnvManager.get_slurm_job_name(),
-            ("environment", "container_mount"): lambda: EnvManager.get_container_mount(),
+            ("environment", "container_mount"): lambda: EnvManager.get_container_mount(model_name),
             ("environment", "container_image"): lambda: EnvManager.get_container_image(),
             ("environment", "trtllm_repo"): lambda: EnvManager.get_repo_dir(),
             ("environment", "trtllm_wheel_path"): lambda: EnvManager.get_trtllm_wheel_path(),

From 929e6fd4155c48e517edfc8f2175ab0f1a51c49d Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Fri, 5 Dec 2025 15:26:39 +0800
Subject: [PATCH 15/20] make model name an optional parameter here

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/utils/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py
index 47bd222ea73..543cc7a81ed 100644
--- a/tests/integration/defs/perf/disagg/utils/common.py
+++ b/tests/integration/defs/perf/disagg/utils/common.py
@@ -103,7 +103,7 @@ def get_install_mode() -> str:
         return os.getenv("INSTALL_MODE", "none")
 
     @staticmethod
-    def get_container_mount(model_name: str) -> str:
+    def get_container_mount(model_name: str = "") -> str:
         work_dir = EnvManager.get_work_dir()
         script_dir = EnvManager.get_script_dir()
         model_dir = EnvManager.get_model_dir()

From 357451fa19dff386bb518b13a5e51a6718ab14b2 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Fri, 5 Dec 2025 16:16:17 +0800
Subject: [PATCH 16/20] update dataset path

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 ...r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +-
 ...22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml | 2 +-
 ...22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml | 2 +-
 ...2B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml | 2 +-
 ...22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml | 2 +-
 ...-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml | 2 +-
 ...k-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml | 2 +-
 ...r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml | 2 +-
 ...-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml | 2 +-
 ...fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml | 2 +-
 ...-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml | 2 +-
 ...k-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml | 2 +-
 ...-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...k-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml | 2 +-
 ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +-
 ...ing-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml | 2 +-
 20 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
index 300dd848972..5de651526ec 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 0
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
   accuracy:
     datasets:
     - dataset_name: gsm8k
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index ba1c599055b..42a52be3c3b 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 6
-  dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json
   accuracy:
     datasets:
     - dataset_name: gsm8k
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml
index 60a221d9968..927fdae9885 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 8
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml
index 8724f191f5e..8c138fc7f03 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 11
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
index 738c7206502..a4af6a85968 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 10
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
index af30a466bea..cf7aaf0f6c1 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 13
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml
index c44b3f6bba8..a56926befd4 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 9
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml
index b7a79d74344..54854c0bf5c 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 12
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
index ed1b087b28f..99121fca3d2 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 1
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
index 51e08813266..6dcc5d71d38 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 3
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
index dafd262468e..d934ef4c0af 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 0
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
index 6d2003c1734..0a37ad83dbf 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 2
-  dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml
index e1e14ad0adb..9c045491ccf 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml
@@ -9,7 +9,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 8k1k
   config_index: 7
-  dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml
index 9ce43d18772..fc4e31ed35c 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 8k1k
   config_index: 14
-  dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
index 0b4c8ea4d5b..83e3521db07 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 8k1k
   config_index: 5
-  dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
index 040d9a14a27..baaa80158b7 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 8k1k
   config_index: 7
-  dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
index 4be04844bb5..7e722b4424f 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 8k1k
   config_index: 4
-  dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
index 77d42e88c8c..2205179880f 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 8k1k
   config_index: 6
-  dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index 1f33b8d2450..78081a23acc 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 1k1k
   config_index: 6
-  dataset_file: datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/kimi-k2-1024-1024-100000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
index bb7b0efc016..ce6a85757b3 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-UCX.yaml
@@ -8,7 +8,7 @@ metadata:
   script_file: disaggr_torch.slurm
   benchmark_type: 8k1k
   config_index: 6
-  dataset_file: datasets/kimi-k2-8192-1024-20000-ratio-1_for_serve.json
+  dataset_file: disagg_datasets/kimi-k2-8192-1024-20000-ratio-1_for_serve.json
 slurm:
   script_file: disaggr_torch.slurm
   partition: <partition>

From 09f19ce76e0606533925ba4fdfa1ff61824ac296 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Mon, 8 Dec 2025 09:56:34 +0800
Subject: [PATCH 17/20] fix regex expression patterns for the new output format

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 .../defs/perf/disagg/utils/config_loader.py   | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py
index fa423ac0639..7ee64d410d5 100644
--- a/tests/integration/defs/perf/disagg/utils/config_loader.py
+++ b/tests/integration/defs/perf/disagg/utils/config_loader.py
@@ -88,9 +88,9 @@ def get_all_dataset_names(self) -> List[str]:
         log_file="bench.log",
         extractor_pattern=r"""
             ^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
-            ^.*?(?:\n|.)*?$\n
+            (?:.*\n)*?
             ^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
-            ^.*?(?:\n|.)*?$\n
+            (?:.*\n)*?
             ^.*?Benchmark\ with\ concurrency\ (\d+)\ done
         """,
         metric_names=["SERVER_MEDIAN_TTFT", "SERVER_MEDIAN_E2EL"],
@@ -99,21 +99,29 @@ def get_all_dataset_names(self) -> List[str]:
         log_file="bench.log",
         extractor_pattern=r"""
             ^.*?Mean\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?P99\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
-            ^.*?(?:\n|.)*?$\n
+            (?:.*\n)*?
             ^.*?Mean\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?Median\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?P99\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n
-            ^.*?(?:\n|.)*?$\n
+            (?:.*\n)*?
             ^.*?Mean\ ITL\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?Median\ ITL\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?P99\ ITL\ \(ms\):\s+([0-9.]+).*?$\n
-            ^.*?(?:\n|.)*?$\n
+            (?:.*\n)*?
             ^.*?Mean\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
+            (?:.*\n)*?
             ^.*?P99\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
-            ^.*?(?:\n|.)*?$\n
+            (?:.*\n)*?
             ^.*?Benchmark\ with\ concurrency\ (\d+)\ done
         """,
         metric_names=[

From 642039104c6b1b376aadf4d89194d4caeb613e38 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Mon, 8 Dec 2025 10:57:22 +0800
Subject: [PATCH 18/20] add kimi k2 test cases support

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/compare_backends.py       | 3 ++-
 tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt | 2 +-
 tests/integration/defs/perf/disagg/testlist/wideep.txt       | 2 +-
 tests/integration/defs/perf/disagg/utils/common.py           | 4 ++--
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/compare_backends.py b/tests/integration/defs/perf/disagg/compare_backends.py
index 1a6272c6252..d6fb84e4cb2 100644
--- a/tests/integration/defs/perf/disagg/compare_backends.py
+++ b/tests/integration/defs/perf/disagg/compare_backends.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 """Compare performance test results between different backends (UCX vs NIXL)."""
-import os
+
 import argparse
+import os
 import re
 import sys
 
diff --git a/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt
index 5493e48cce7..4e0bf609f2f 100644
--- a/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt
+++ b/tests/integration/defs/perf/disagg/testlist/disagg_gb300.txt
@@ -1,2 +1,2 @@
 test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX]
-test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL]
\ No newline at end of file
+test_disagg.py::TestDisaggBenchmark::test_benchmark[disagg_perf_deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL]
diff --git a/tests/integration/defs/perf/disagg/testlist/wideep.txt b/tests/integration/defs/perf/disagg/testlist/wideep.txt
index ea1e0be57d4..28684e096f0 100644
--- a/tests/integration/defs/perf/disagg/testlist/wideep.txt
+++ b/tests/integration/defs/perf/disagg/testlist/wideep.txt
@@ -17,4 +17,4 @@ test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_kimi-k2-thinking
 # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX]
 # test_disagg.py::TestDisaggBenchmark::test_benchmark[wideep_perf_Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX]
 test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL]
-test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX]
\ No newline at end of file
+test_disagg.py::TestDisaggBenchmark::test_accuracy[wideep_accuracy_kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX]
diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py
index 543cc7a81ed..c050fdd4682 100644
--- a/tests/integration/defs/perf/disagg/utils/common.py
+++ b/tests/integration/defs/perf/disagg/utils/common.py
@@ -83,7 +83,7 @@ def get_trtllm_wheel_path() -> str:
     @staticmethod
     def get_model_dir() -> str:
         return os.getenv("MODEL_DIR", "<Your model directory>")
-    
+
     @staticmethod
     def get_dataset_dir() -> str:
         return os.getenv("DATASET_DIR", "<Your dataset directory>")
@@ -121,7 +121,7 @@ def get_container_mount(model_name: str = "") -> str:
 
         # Kimi-K2 needs 640G of shared memory, otherwise will cause host memory OOM.
         if model_name.find("kimi-k2") != -1:
-            mounts.append(f"tmpfs:/dev/shm:size=640G")
+            mounts.append("tmpfs:/dev/shm:size=640G")
 
         if dataset_dir and not dataset_dir.startswith("<"):
             mounts.append(f"{dataset_dir}:{dataset_dir}")

From ed15eb2883743b8f72e15b81890415c8c7a38d53 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Mon, 8 Dec 2025 14:42:34 +0800
Subject: [PATCH 19/20] enhance ctx max seq length

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 ...ng-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index 42a52be3c3b..95c1849c232 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -100,7 +100,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 8
     max_num_tokens: 8448
-    max_seq_len: 1044
+    max_seq_len: 2068
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true

From 0848ca824b770bcbcc066e39529bcfcfa59ea1e3 Mon Sep 17 00:00:00 2001
From: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Date: Mon, 8 Dec 2025 16:20:40 +0800
Subject: [PATCH 20/20] enlarge ctx and gen max_seq_len for kimi k2

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 ...-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
index 95c1849c232..4cbcd13dd5d 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-UCX.yaml
@@ -61,7 +61,7 @@ worker_config:
     pipeline_parallel_size: 1
     max_batch_size: 1024
     max_num_tokens: 1024
-    max_seq_len: 2068
+    max_seq_len: 5120
     cuda_graph_config:
       enable_padding: true
       batch_sizes:
@@ -100,7 +100,7 @@ worker_config:
     enable_layerwise_nvtx_marker: true
     max_batch_size: 8
     max_num_tokens: 8448
-    max_seq_len: 2068
+    max_seq_len: 5120
     tensor_parallel_size: 4
     moe_expert_parallel_size: 4
     enable_attention_dp: true