Skip to content

Commit ec31363

Browse files
authored
[None][fix] Layer wise benchmarks: use local models, lint (#8799)
Signed-off-by: Tailing Yuan <[email protected]>
1 parent 9112cff commit ec31363

File tree

5 files changed

+36
-25
lines changed

5 files changed

+36
-25
lines changed

examples/layer_wise_benchmarks/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ NP=4 ./mpi_launch.sh ./run_single.sh config_gen.yaml --batch-size 32 --seq-len-q
4242
NP=4 ./mpi_launch.sh ./run_single.sh config_ctx.yaml --layer-indices 5,6,7,8
4343
NP=4 ./mpi_launch.sh ./run_single.sh config_gen.yaml --layer-indices 5,6,7,8
4444

45-
# Scale DEP=16 MNNVL to 4 GPUs: reduce the number of experts, uses MNNVL A2A if applicable
45+
# Scale DEP=16 to 4 GPUs: reduce the number of experts, uses MNNVL A2A if applicable
4646
NP=4 ./mpi_launch.sh ./run_single.sh config_gen.yaml --scaled-from 16 --moe-backend WIDEEP
4747

4848
# Scale TEP=16 to 4 GPUs: reduce the number of attention heads and experts

examples/layer_wise_benchmarks/slurm_init_containers.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ if [ "${CONTAINER_IMAGE:-}" == "" ]; then
2222
DOCKER_IMAGE=$LLM_SBSA_DOCKER_IMAGE
2323
else
2424
echo "Unsupported machine hardware name \"$MACHINE\""
25+
exit 1
2526
fi
2627

2728
# Change "urm.nvidia.com/sw-tensorrt-docker/..." to "urm.nvidia.com#sw-tensorrt-docker/..." to bypass credentials

tensorrt_llm/tools/layer_wise_benchmarks/deepseekv3_runner.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __init__(self, *args, **kwargs):
4848
super().__init__(*args, **kwargs)
4949
self.world_size = mpi_world_size()
5050
self.rank = mpi_rank()
51-
self.balance_method = None
51+
self.balance_method = BalanceMethod.NotModified
5252
self.balance_ratio = None
5353

5454
def apply(self, router_logits) -> (torch.Tensor, torch.Tensor):
@@ -276,7 +276,7 @@ def create_run_pack(self,
276276
batch_size: int,
277277
seq_len_q: int,
278278
seq_len_kv_cache: int,
279-
kv_cache_manager: Optional[KVCacheManager] = None,
279+
kv_cache_manager: KVCacheManager,
280280
attn_workspace: Optional[torch.Tensor] = None):
281281
if self.model_config.moe_backend == "TRTLLM" and os.getenv(
282282
"TRTLLM_ENABLE_PDL") != "1":
@@ -292,11 +292,11 @@ def create_run_pack(self,
292292
max_num_requests=kv_cache_manager.max_batch_size,
293293
num_contexts={
294294
"CTX": batch_size,
295-
"GEN": 0
295+
"GEN": 0,
296296
}[run_type],
297297
prompt_lens=[{
298298
"CTX": seq_len_q,
299-
"GEN": seq_len_kv_cache
299+
"GEN": seq_len_kv_cache,
300300
}[run_type]] * batch_size,
301301
max_num_tokens=batch_size * seq_len_q,
302302
kv_cache_manager=kv_cache_manager,
@@ -380,7 +380,7 @@ def create_kv_cache_manager(pretrained_model_name_or_path, mapping,
380380
mapping=mapping,
381381
dtype=torch_dtype_to_binding({
382382
None: torch.bfloat16,
383-
"FP8": torch.float8_e4m3fn
383+
"FP8": torch.float8_e4m3fn,
384384
}[model_config.quant_config.kv_cache_quant_algo]),
385385
sparse_attn_config=model_config.sparse_attention_config,
386386
)

tests/integration/defs/conftest.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,11 +626,25 @@ def deepseek_v3_model_root(request):
626626
elif request.param == "DeepSeek-V3-Lite-nvfp4_moe_only":
627627
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite",
628628
"nvfp4_moe_only")
629+
elif request.param == "DeepSeek-V3.2-Exp":
630+
deepseek_v3_model_root = os.path.join(models_root,
631+
"DeepSeek-V3.2-Exp-hf")
629632
assert exists(
630633
deepseek_v3_model_root), f"{deepseek_v3_model_root} does not exist!"
631634
return deepseek_v3_model_root
632635

633636

637+
@pytest.fixture(scope="function")
638+
def deepseek_r1_model_root(request):
639+
models_root = llm_models_root()
640+
if request.param == "DeepSeek-R1-0528-FP4-v2":
641+
deepseek_r1_model_root = os.path.join(models_root, "DeepSeek-R1",
642+
"DeepSeek-R1-0528-FP4-v2")
643+
assert exists(
644+
deepseek_r1_model_root), f"{deepseek_r1_model_root} does not exist!"
645+
return deepseek_r1_model_root
646+
647+
634648
@pytest.fixture(scope="session")
635649
def trt_performance_cache_name():
636650
return "performance.cache"

tests/unittest/tools/test_layer_wise_benchmarks.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,23 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2-
# SPDX-License-Identifier: Apache-2.0
3-
#
4-
# Licensed under the Apache License, Version 2.0 (the "License");
5-
# you may not use this file except in compliance with the License.
6-
# You may obtain a copy of the License at
7-
#
8-
# http://www.apache.org/licenses/LICENSE-2.0
9-
#
10-
# Unless required by applicable law or agreed to in writing, software
11-
# distributed under the License is distributed on an "AS IS" BASIS,
12-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
# See the License for the specific language governing permissions and
14-
# limitations under the License.
151
import os
162

173
import pytest
184
import torch
5+
from defs.conftest import deepseek_r1_model_root # noqa: F401
6+
from defs.conftest import deepseek_v3_model_root # noqa: F401
197
from defs.trt_test_alternative import check_call
208
from utils.cpp_paths import llm_root # noqa: F401
219

2210

2311
@pytest.mark.skipif(torch.cuda.device_count() < 4,
2412
reason="needs 4 GPUs to run this test")
25-
def test_deepseek_r1_ctx_tep(llm_root):
13+
@pytest.mark.parametrize("deepseek_r1_model_root", ["DeepSeek-R1-0528-FP4-v2"],
14+
indirect=True)
15+
def test_deepseek_r1_ctx_tep(llm_root, deepseek_r1_model_root):
2616
check_call([
2717
"./mpi_launch.sh",
2818
"./run_single.sh",
2919
"config_ctx.yaml",
20+
"--model=" + deepseek_r1_model_root,
3021
"--no-enable-attention-dp",
3122
"--moe-backend=TRTLLM",
3223
],
@@ -40,12 +31,14 @@ def test_deepseek_r1_ctx_tep(llm_root):
4031

4132
@pytest.mark.skipif(torch.cuda.device_count() < 4,
4233
reason="needs 4 GPUs to run this test")
43-
def test_deepseek_v32_ctx_dep(llm_root):
34+
@pytest.mark.parametrize("deepseek_v3_model_root", ["DeepSeek-V3.2-Exp"],
35+
indirect=True)
36+
def test_deepseek_v32_ctx_dep(llm_root, deepseek_v3_model_root):
4437
check_call([
4538
"./mpi_launch.sh",
4639
"./run_single.sh",
4740
"config_ctx.yaml",
48-
"--model=deepseek-ai/DeepSeek-V3.2-Exp",
41+
"--model=" + deepseek_v3_model_root,
4942
"--tokens-per-block=64",
5043
"--moe-backend=DEEPGEMM",
5144
],
@@ -58,14 +51,17 @@ def test_deepseek_v32_ctx_dep(llm_root):
5851

5952
@pytest.mark.skipif(torch.cuda.device_count() < 4,
6053
reason="needs 4 GPUs to run this test")
61-
def test_deepseek_r1_gen_scaled_from_16_dep(llm_root):
54+
@pytest.mark.parametrize("deepseek_r1_model_root", ["DeepSeek-R1-0528-FP4-v2"],
55+
indirect=True)
56+
def test_deepseek_r1_gen_scaled_from_16_dep(llm_root, deepseek_r1_model_root):
6257
check_call([
6358
"./mpi_launch.sh",
6459
"./run_single.sh",
6560
"config_gen.yaml",
61+
"--model=" + deepseek_r1_model_root,
62+
"--layer-indices=5,6",
6663
"--scaled-from=16",
6764
"--moe-backend=WIDEEP",
68-
"--layer-indices=5,6",
6965
],
7066
cwd=llm_root / "examples" / "layer_wise_benchmarks",
7167
env={

0 commit comments

Comments
 (0)