Skip to content

Commit b6a1607

Browse files
tshmilnvidiaglevnv
authored andcommitted
GDS_MT backend support for LoopbackAgent
Signed-off-by: Tomer Shmilovich <[email protected]>
1 parent c19c9de commit b6a1607

File tree

5 files changed

+16
-7
lines changed

5 files changed

+16
-7
lines changed

cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ class BlockManager
873873
SizeType32 sinkBubbleLength, bool onboardBlocks, CacheType cacheType = CacheType::kSELF,
874874
std::optional<executor::RetentionPriority> secondaryOffloadMinPriority = std::nullopt,
875875
std::shared_ptr<KVCacheEventManager> eventManager = nullptr, bool enablePartialReuse = true,
876-
bool copyOnPartialReuse = true);
876+
bool copyOnPartialReuse = true, bool multiThreadReuse = false);
877877

878878
BlockManager(BlockManager const&) = delete;
879879
BlockManager& operator=(BlockManager const&) = delete;

cpp/include/tensorrt_llm/executor/transferAgent.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ struct BaseAgentConfig
270270
{
271271
std::string mName;
272272
bool useProgThread;
273+
bool multiThread;
273274
};
274275

275276
class BaseTransferAgent

cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,15 +505,16 @@ BlockManager::BlockManager(std::vector<SizeType32> const& numKvHeadsPerLayer, Si
505505
std::optional<TempAttentionWindowInputs> const& tempAttentionWindowInputs, nvinfer1::DataType dtype,
506506
SizeType32 sinkBubbleLength, bool onboardBlocks, CacheType cacheType,
507507
std::optional<executor::RetentionPriority> secondaryOffloadMinPriority,
508-
std::shared_ptr<KVCacheEventManager> eventManager, bool enablePartialReuse, bool copyOnPartialReuse)
508+
std::shared_ptr<KVCacheEventManager> eventManager, bool enablePartialReuse, bool copyOnPartialReuse,
509+
bool multiThreadReuse)
509510
: mNumLayers{static_cast<SizeType32>(numKvHeadsPerLayer.size())}
510511
, mTokensPerBlock{tokensPerBlock}
511512
, mEventManager{std::move(eventManager)}
512513
, mStream{stream}
513514
, mCacheType{cacheType}
514515
{
515516
mAgentName = std::string("GDSAgent");
516-
BaseAgentConfig config{mAgentName, true};
517+
BaseAgentConfig config{mAgentName, true, multiThreadReuse};
517518
mLoopbackAgent = makeLoopbackAgent("nixl", &config);
518519

519520
auto const uniqueWindowSizeToLayers

cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/transferAgent.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -490,10 +490,17 @@ NixlLoopbackAgent::NixlLoopbackAgent(BaseAgentConfig const& config)
490490
init["batch_limit"] = std::to_string(128);
491491
init["max_request_size"] = std::to_string(16 * 1024 * 1024);
492492

493-
status = mRawAgent->createBackend("GDS", init, backend);
494-
if (status != NIXL_SUCCESS || !backend)
493+
if (config.multiThread)
495494
{
496-
TLLM_THROW("Failed to create NIXL backend, status = %d", status);
495+
status = mRawAgent->createBackend("GDS_MT", init, backend);
496+
if (status != NIXL_SUCCESS || !backend)
497+
TLLM_THROW("Failed to create NIXL GDS_MT backend, status = %d", status);
498+
}
499+
else
500+
{
501+
status = mRawAgent->createBackend("GDS", init, backend);
502+
if (status != NIXL_SUCCESS || !backend)
503+
TLLM_THROW("Failed to create NIXL GDS backend, status = %d", status);
497504
}
498505
}
499506

docker/common/install_nixl.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ meson setup builddir \
3232
-Dcudapath_inc="$CUDA_PATH/include" \
3333
-Dgds_path="$GDS_PATH" \
3434
-Dinstall_headers=true \
35-
-Dstatic_plugins=UCX,GDS
35+
-Dstatic_plugins=UCX,GDS,GDS_MT
3636

3737
cd builddir && ninja install
3838
cd ../..

0 commit comments

Comments
 (0)