Skip to content

Commit 66044e8

Browse files
committed
review comment
Signed-off-by: Iman Tabrizian <[email protected]>
1 parent c2ec52d commit 66044e8

File tree

5 files changed

+12
-13
lines changed

5 files changed

+12
-13
lines changed

cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -821,8 +821,7 @@ class WindowBlockManager
821821
return mIsSWA;
822822
}
823823

824-
[[nodiscard]] std::optional<std::shared_ptr<KVCacheBlock>> findBlocksInReuseTreeByBlockKey(
825-
BlockKey const& blockKey);
824+
[[nodiscard]] std::shared_ptr<KVCacheBlock> findBlocksInReuseTreeByBlockKey(BlockKey const& blockKey);
826825

827826
//! \brief Unpin blocks by starting from a block id and walking prev pointers.
828827
void unpinBlocksById(KVCacheBlock::IdType blockId);
@@ -1194,7 +1193,7 @@ class BlockManager
11941193
return mWindowBlockManagers.at(windowSize).getBlockById(blockId);
11951194
}
11961195

1197-
[[nodiscard]] std::optional<std::shared_ptr<KVCacheBlock>> findBlocksInReuseTreeByBlockKey(
1196+
[[nodiscard]] std::shared_ptr<KVCacheBlock> findBlocksInReuseTreeByBlockKey(
11981197
BlockKey const& blockKey, SizeType32 windowSize)
11991198
{
12001199
return mWindowBlockManagers.at(windowSize).findBlocksInReuseTreeByBlockKey(blockKey);
@@ -1491,7 +1490,7 @@ class BaseKVCacheManager
14911490

14921491
[[nodiscard]] virtual CacheType getCacheType() const = 0;
14931492

1494-
[[nodiscard]] virtual std::optional<std::shared_ptr<KVCacheBlock>> findBlocksInReuseTreeByBlockKey(
1493+
[[nodiscard]] virtual std::shared_ptr<KVCacheBlock> findBlocksInReuseTreeByBlockKey(
14951494
BlockKey const& blockKey, SizeType32 windowSize)
14961495
= 0;
14971496

@@ -1794,7 +1793,7 @@ class KVCacheManager : public BaseKVCacheManager
17941793
mBlockManager.flushIterationEvents();
17951794
}
17961795

1797-
std::optional<std::shared_ptr<KVCacheBlock>> findBlocksInReuseTreeByBlockKey(
1796+
std::shared_ptr<KVCacheBlock> findBlocksInReuseTreeByBlockKey(
17981797
BlockKey const& blockKey, SizeType32 windowSize) override
17991798
{
18001799
return mBlockManager.findBlocksInReuseTreeByBlockKey(blockKey, windowSize);

cpp/include/tensorrt_llm/batch_manager/kvCacheUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class BlockRange
4545
{
4646
auto const windowSize = firstWindowSize(cacheManager);
4747
// Find the last block in the reuse tree for the provided full sequence of block keys
48-
auto lastBlock = *cacheManager.findBlocksInReuseTreeByBlockKey(lastBlockKey, windowSize);
48+
auto lastBlock = cacheManager.findBlocksInReuseTreeByBlockKey(lastBlockKey, windowSize);
4949
// TODO: handle the case where the last block is not found
5050
TLLM_CHECK_WITH_INFO(lastBlock, "Couldn't find the requested block in the reuse tree");
5151
int32_t const numBlocksToCollect = indexFromEnd + 1;

cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,8 +1096,7 @@ bool WindowBlockManager::blockInRadixTree(BlockPtr const& block)
10961096
return !block->getUniqueTokens().empty() && block->getPrevBlock() != nullptr;
10971097
}
10981098

1099-
std::optional<std::shared_ptr<KVCacheBlock>> WindowBlockManager::findBlocksInReuseTreeByBlockKey(
1100-
BlockKey const& blockKey)
1099+
std::shared_ptr<KVCacheBlock> WindowBlockManager::findBlocksInReuseTreeByBlockKey(BlockKey const& blockKey)
11011100
{
11021101
std::lock_guard<std::mutex> lock(mCachedBlocksRootMutex);
11031102
auto blockedUniqueTokens
@@ -1118,7 +1117,7 @@ std::optional<std::shared_ptr<KVCacheBlock>> WindowBlockManager::findBlocksInReu
11181117

11191118
if (matchingBlock == nullptr)
11201119
{
1121-
return std::nullopt;
1120+
return nullptr;
11221121
}
11231122

11241123
searchRoot = std::move(matchingBlock);

cpp/tests/unit_tests/batch_manager/kvCacheManagerTest.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -639,9 +639,8 @@ TEST_F(KVCacheManagerTest, FindBlocksInReuseTreeByBlockKeysTest)
639639
inputTokens->pop_back();
640640
BlockKey fullKey{*inputTokens};
641641
auto const foundFull = kvCacheManager.findBlocksInReuseTreeByBlockKey(fullKey, maxAttentionWindow);
642-
ASSERT_TRUE(foundFull.has_value());
643-
ASSERT_NE(foundFull.value(), nullptr);
644-
auto const& lastBlock = foundFull.value();
642+
ASSERT_NE(foundFull, nullptr);
643+
auto const& lastBlock = foundFull;
645644

646645
// Check the chain back to previous blocks
647646
auto const prev2 = lastBlock->getPrevBlock();

jenkins/Build.groovy

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,17 +102,19 @@ def BUILD_CONFIGS = [
102102
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON",
103103
(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz",
104104
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
105+
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
105106
],
106107
(CONFIG_LINUX_AARCH64_PYBIND): [
107108
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl",
108109
(TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
109110
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
111+
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
110112
],
111113
(CONFIG_LINUX_AARCH64_LLVM) : [
112114
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
113115
(TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
114116
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
115-
(BUILD_JOBS_FOR_CONFIG): "6", // TODO: Remove after fix the build OOM issue on SBSA
117+
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
116118
],
117119
]
118120

0 commit comments

Comments
 (0)