diff --git a/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp b/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp index 4a5ddb89286..9bc96eb5383 100644 --- a/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp +++ b/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp @@ -272,8 +272,8 @@ TrtGptModelInflightBatching::TrtGptModelInflightBatching(std::shared_ptr(numKvHeadsPerLayerBegin, numKvHeadsPerLayerEnd); - auto windowSizeLayers - = BaseKVCacheManager::groupLayersByWindowSize(maxAttentionWindowVec, modelConfig.getNbLayers()); + auto const numLayers = static_cast(numKvHeadsPerLayer.size()); + auto const windowSizeLayers = KVCacheManager::groupLayersByWindowSize(maxAttentionWindowVec, numLayers); std::map cacheSizeBytesPerTokenPerWindow; for (auto const& [windowSize, managedLayers] : windowSizeLayers) {