From 1a3b4171a4d0ae0e8cba12ddeda67b5d4cf5580c Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sat, 2 May 2026 22:01:18 -0700 Subject: [PATCH 1/4] update dsv4 trt fused mhc image --- .github/configs/nvidia-master.yaml | 2 +- benchmarks/single_node/dsv4_fp4_b300_trt.sh | 7 ++----- perf-changelog.yaml | 7 +++++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 97665ca53..71c420f04 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2707,7 +2707,7 @@ dsv4-fp4-b300-vllm: - { tp: 8, ep: 8, dp-attn: true, conc-start: 2048, conc-end: 2048 } dsv4-fp4-b300-trt: - image: ghcr.io#semianalysisai/trtllm-deepseek-v4:feat-deepseek_v4-4999884 + image: ghcr.io#semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: b300 diff --git a/benchmarks/single_node/dsv4_fp4_b300_trt.sh b/benchmarks/single_node/dsv4_fp4_b300_trt.sh index 1356ecbac..754846912 100644 --- a/benchmarks/single_node/dsv4_fp4_b300_trt.sh +++ b/benchmarks/single_node/dsv4_fp4_b300_trt.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # DeepSeek-V4-Pro single-node TRTLLM recipe for B300. The configured image -# already contains NVIDIA/TensorRT-LLM@feat/deepseek_v4; do not build TRTLLM at +# already contains a TensorRT-LLM DeepSeek-V4 build; do not build TRTLLM at # runtime from this benchmark path. source "$(dirname "$0")/../benchmark_lib.sh" @@ -101,10 +101,7 @@ if [ "${EVAL_ONLY}" = "true" ]; then MAX_NUM_TOKENS="$EVAL_MAX_MODEL_LEN" fi -# DeepSeek-V4-Pro has hidden size 7168. The current TRTLLM fused-HC MHC -# path corrupts eval generations for this shape; keep eval servers on the -# unfused path until the fused kernel is guarded or supports 7168. -export TRTLLM_MHC_ENABLE_FUSED_HC=0 +export TRTLLM_MHC_ENABLE_FUSED_HC="${TRTLLM_MHC_ENABLE_FUSED_HC:-1}" echo "TRTLLM_MHC_ENABLE_FUSED_HC: $TRTLLM_MHC_ENABLE_FUSED_HC" start_gpu_monitor --output "$PWD/gpu_metrics.csv" diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 4098a580a..df1a22cff 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2148,3 +2148,10 @@ - "Disable TRTLLM fused MHC hyper-connection for eval servers via TRTLLM_MHC_ENABLE_FUSED_HC=0 because the current fused kernel corrupts DeepSeek-V4-Pro hidden size 7168 generations" - "Keep this as eval-only PR validation until the TensorRT-LLM fused MHC kernel is guarded or supports hidden size 7168" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1233 + +- config-keys: + - dsv4-fp4-b300-trt + description: + - "Update the TensorRT-LLM DeepSeek-V4-Pro image to ghcr.io/semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e" + - "Enable TRTLLM fused MHC by default now that the image includes the hidden-size 7168 fused-HC fix" + pr-link: TBD From 1ce0e2294e2524dc87add60a4a0320ba3f6629f6 Mon Sep 17 00:00:00 2001 From: Bryan Shan <58582368+Oseltamivir@users.noreply.github.com> Date: Sat, 2 May 2026 22:08:28 -0700 Subject: [PATCH 2/4] Update perf-changelog.yaml --- perf-changelog.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index df1a22cff..a97ec6533 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2154,4 +2154,5 @@ description: - "Update the TensorRT-LLM DeepSeek-V4-Pro image to ghcr.io/semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e" - "Enable TRTLLM fused MHC by default now that the image includes the hidden-size 7168 fused-HC fix" - pr-link: TBD + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1270 + evals-only: true From 660a3b787d84a69894f9657c23ab56aa3a437817 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sat, 2 May 2026 22:37:12 -0700 Subject: [PATCH 3/4] all, again --- perf-changelog.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index a97ec6533..5d02725da 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2155,4 +2155,3 @@ - "Update the TensorRT-LLM DeepSeek-V4-Pro image to ghcr.io/semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e" - "Enable TRTLLM fused MHC by default now that the image includes the hidden-size 7168 fused-HC fix" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1270 - evals-only: true From 8804f7bff0f2461ba6b210ec2ee87f58d60ddfac Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Tue, 5 May 2026 13:28:36 -0700 Subject: [PATCH 4/4] merged PR --- .github/configs/nvidia-master.yaml | 2 +- perf-changelog.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 71c420f04..9e0561607 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2707,7 +2707,7 @@ dsv4-fp4-b300-vllm: - { tp: 8, ep: 8, dp-attn: true, conc-start: 2048, conc-end: 2048 } dsv4-fp4-b300-trt: - image: ghcr.io#semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e + image: ghcr.io#semianalysisai/trtllm-deepseek-v4:feat-deepseek_v4-9aa3715 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: b300 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 5d02725da..8fb77e84c 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2152,6 +2152,6 @@ - config-keys: - dsv4-fp4-b300-trt description: - - "Update the TensorRT-LLM DeepSeek-V4-Pro image to ghcr.io/semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e" - - "Enable TRTLLM fused MHC by default now that the image includes the hidden-size 7168 fused-HC fix" + - "Update the TensorRT-LLM DeepSeek-V4-Pro image to ghcr.io/semianalysisai/trtllm-deepseek-v4:feat-deepseek_v4-9aa3715" + - "Enable TRTLLM fused MHC by default with the DeepSeek-V4 feature image" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1270