Skip to content

Commit a7e30ba

Browse files
committed
add test for mtp
Signed-off-by: Chuang Zhu <[email protected]>
1 parent 2c59976 commit a7e30ba

File tree

7 files changed

+71
-8
lines changed

7 files changed

+71
-8
lines changed

cpp/tensorrt_llm/executor/serialization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@ size_t Serialization::serializedSize(Tensor const& tensor)
859859
auto memoryType = tensor.getMemoryType();
860860
totalSize += sizeof(memoryType); // memory type
861861

862-
totalSize += sizeof(size_t); // Size in bytes
862+
totalSize += sizeof(size_t); // Size in bytes
863863
totalSize += tensor.getSizeInBytes();
864864
return totalSize;
865865
}

cpp/tests/unit_tests/batch_manager/cacheTransBufferTest.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,9 @@ TEST_F(CacheTransBufferTest, TestForNullOptAndNoneTransSize)
346346
auto bufferManager = tensorrt_llm::runtime::BufferManager{std::make_shared<CudaStream>()};
347347
auto targetNum = 2;
348348
auto targetSize = 1024;
349+
std::vector<size_t> targetSizeVec = std::vector<size_t>(targetNum, targetSize);
349350
auto [sendBuffers, bufferCoverTargetNum, onlyUseDynamicBuffer]
350-
= mTransBufferManager->getOrAllocateSendBuffers(bufferId3, targetNum, targetSize, bufferManager);
351+
= mTransBufferManager->getOrAllocateSendBuffers(bufferId3, targetNum, targetSizeVec, bufferManager);
351352
EXPECT_EQ(sendBuffers.size(), targetNum);
352353
EXPECT_EQ(bufferCoverTargetNum, targetNum);
353354
EXPECT_EQ(onlyUseDynamicBuffer, true);
@@ -393,8 +394,9 @@ TEST_F(CacheTransBufferTest, TestForNullOptAndDefaultTransSize)
393394
auto bufferManager = tensorrt_llm::runtime::BufferManager{std::make_shared<CudaStream>()};
394395
auto targetNum = 2;
395396
auto targetSize = 1024;
397+
std::vector<size_t> targetSizeVec = std::vector<size_t>(targetNum, targetSize);
396398
auto [sendBuffers, bufferCoverTargetNum, onlyUseDynamicBuffer]
397-
= mTransBufferManager->getOrAllocateSendBuffers(bufferId3, targetNum, targetSize, bufferManager);
399+
= mTransBufferManager->getOrAllocateSendBuffers(bufferId3, targetNum, targetSizeVec, bufferManager);
398400
EXPECT_EQ(sendBuffers.size(), targetNum);
399401
EXPECT_EQ(bufferCoverTargetNum, targetNum);
400402
EXPECT_EQ(onlyUseDynamicBuffer, false);
@@ -407,8 +409,9 @@ TEST_F(CacheTransBufferTest, TestForNullOptAndDefaultTransSize)
407409
auto bufferId4 = mTransBufferManager->assignBufferIndexForSend();
408410
EXPECT_TRUE(bufferId4.has_value());
409411
EXPECT_EQ(bufferId4.value(), 0);
412+
targetSizeVec = std::vector<size_t>(targetNum, targetSize);
410413
auto [sendBuffers2, bufferCoverTargetNum2, onlyUseDynamicBuffer2]
411-
= mTransBufferManager->getOrAllocateSendBuffers(bufferId4, targetNum, targetSize, bufferManager);
414+
= mTransBufferManager->getOrAllocateSendBuffers(bufferId4, targetNum, targetSizeVec, bufferManager);
412415
EXPECT_EQ(sendBuffers2.size(), targetNum);
413416
EXPECT_EQ(bufferCoverTargetNum2, targetNum / 2);
414417
EXPECT_EQ(onlyUseDynamicBuffer2, false);
@@ -418,8 +421,9 @@ TEST_F(CacheTransBufferTest, TestForNullOptAndDefaultTransSize)
418421
auto bufferId5 = mTransBufferManager->assignBufferIndexForSend();
419422
EXPECT_TRUE(bufferId5.has_value());
420423
EXPECT_EQ(bufferId5.value(), 0);
424+
targetSizeVec = std::vector<size_t>(targetNum, targetSize);
421425
auto [sendBuffers3, bufferCoverTargetNum3, onlyUseDynamicBuffer3]
422-
= mTransBufferManager->getOrAllocateSendBuffers(bufferId5, targetNum, targetSize, bufferManager);
426+
= mTransBufferManager->getOrAllocateSendBuffers(bufferId5, targetNum, targetSizeVec, bufferManager);
423427
EXPECT_EQ(sendBuffers3.size(), targetNum);
424428
EXPECT_EQ(bufferCoverTargetNum3, targetNum);
425429
EXPECT_EQ(onlyUseDynamicBuffer3, false);

cpp/tests/unit_tests/executor/agentCommTest.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class AgentCommTest : public ::testing::Test
7878
auto constexpr dataType = nvinfer1::DataType::kFLOAT;
7979

8080
using BlocksPerWindow = std::map<SizeType32, std::tuple<SizeType32, SizeType32>>;
81-
const BlocksPerWindow blocksPerWindow
81+
BlocksPerWindow const blocksPerWindow
8282
= {{maxAttentionWindow, std::make_tuple(totalNumBlocks, blocksInSecondaryPool)}};
8383

8484
mCacheManager = std::make_unique<KVCacheManager>(numLayers, numHeads, sizePerHead, tokensPerBlock,
@@ -90,7 +90,8 @@ class AgentCommTest : public ::testing::Test
9090

9191
size_t maxNumTokens = 1024;
9292
mTransBufferManager = std::make_unique<CacheTransBufferManager>(mCacheManager.get(), maxNumTokens);
93-
mCacheState = std::make_unique<CacheState>(numLayers, numHeads, sizePerHead, tokensPerBlock, 1, 1, 1, dataType);
93+
mCacheState = std::make_unique<CacheState>(
94+
numLayers, numHeads, sizePerHead, tokensPerBlock, 1, 1, 1, std::vector<SizeType32>{numLayers}, dataType);
9495
}
9596

9697
void TearDown() override

cpp/tests/unit_tests/executor/serializeUtilsTest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,7 @@ TEST(SerializeUtilsTest, ContextPhaseParams)
726726
{
727727
auto state = std::make_unique<texec::DataTransceiverState>();
728728
state->setCommState(texec::kv_cache::CommState{12, "127.0.0.1"});
729-
state->setCacheState(texec::kv_cache::CacheState{10, 12, 128, 128, 8, 8, 8, nvinfer1::DataType::kFLOAT});
729+
state->setCacheState(texec::kv_cache::CacheState{10, 12, 128, 128, 8, 8, 8, {4}, nvinfer1::DataType::kFLOAT});
730730
auto stats = texec::ContextPhaseParams({10, 20, 30, 40, 50, 60}, 0, state.release(), VecTokens{10, 20});
731731
auto stats2 = serializeDeserialize(stats);
732732
EXPECT_EQ(stats, stats2);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
hostname: localhost
2+
port: 8000
3+
model: DeepSeek-V3-Lite/fp8
4+
free_gpu_memory_fraction: 0.1
5+
backend: "pytorch"
6+
cuda_graph_config: null
7+
disable_overlap_scheduler: True
8+
9+
context_servers:
10+
num_instances: 1
11+
tensor_parallel_size: 1
12+
pipeline_parallel_size: 2
13+
enable_attention_dp: false
14+
speculative_config:
15+
decoding_type: MTP
16+
num_nextn_predict_layers: 1
17+
cache_transceiver_config:
18+
backend: DEFAULT
19+
urls:
20+
- "localhost:8001"
21+
generation_servers:
22+
num_instances: 1
23+
tensor_parallel_size: 2
24+
pipeline_parallel_size: 1
25+
enable_attention_dp: false
26+
speculative_config:
27+
decoding_type: MTP
28+
num_nextn_predict_layers: 1
29+
cache_transceiver_config:
30+
backend: DEFAULT
31+
urls:
32+
- "localhost:8002"

tests/integration/defs/disaggregated/test_disaggregated.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ def get_test_config(test_desc, example_dir, test_root):
140140
(2,
141141
f"{test_configs_root}/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_two_mtp.yaml"
142142
),
143+
"deepseek_v3_lite_fp8_ctxpp2_gentp2_one_mtp":
144+
(4,
145+
f"{test_configs_root}/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_one_mtp_ctxpp2_gentp2.yaml"
146+
),
143147
}
144148

145149
if test_desc not in config_map:
@@ -797,6 +801,27 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp(
797801
cwd=llm_venv.get_working_directory())
798802

799803

804+
@skip_no_hopper
805+
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
806+
indirect=True)
807+
def test_disaggregated_deepseek_v3_lite_fp8_ctxpp2_gentp2_one_mtp(
808+
disaggregated_test_root, disaggregated_example_root, llm_venv,
809+
deepseek_v3_model_root):
810+
src_dst_dict = {
811+
deepseek_v3_model_root:
812+
f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8",
813+
}
814+
for src, dst in src_dst_dict.items():
815+
if not os.path.islink(dst):
816+
os.makedirs(os.path.dirname(dst), exist_ok=True)
817+
os.symlink(src, dst, target_is_directory=True)
818+
819+
run_disaggregated_test(disaggregated_example_root,
820+
"deepseek_v3_lite_fp8_ctxpp2_gentp2_one_mtp",
821+
env=llm_venv._new_env,
822+
cwd=llm_venv.get_working_directory())
823+
824+
800825
@skip_no_hopper
801826
@skip_arm
802827
@pytest.mark.skip_less_device(4)

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ l0_dgx_h100:
134134
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_overlap_cuda_graph[DeepSeek-V3-Lite-fp8]
135135
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_cache_aware_balance[DeepSeek-V3-Lite-bf16]
136136
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_conditional[DeepSeek-V3-Lite-bf16]
137+
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ctxpp2_gentp2_one_mtp[DeepSeek-V3-Lite-fp8]
137138
- disaggregated/test_workers.py::test_workers_conditional_disaggregation_deepseek_v3_lite_bf16[DeepSeek-V3-Lite-bf16]
138139
- disaggregated/test_workers.py::test_workers_kv_cache_aware_router_deepseek_v3_lite_bf16[DeepSeek-V3-Lite-bf16]
139140
- condition:

0 commit comments

Comments
 (0)