@@ -478,6 +478,17 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
478478
479479 void TearDown () override {}
480480
481+ int getEnvMpiDebugRank ()
482+ {
483+ // Look-up env variable TLLM_DEBUG_RANK.
484+ char const * const env = std::getenv (" TLLM_DEBUG_RANK" );
485+ if (env == nullptr )
486+ {
487+ return -2 ; // -1 means all ranks, -2 means no debug rank.
488+ }
489+ return std::stoi (env);
490+ }
491+
481492 void setUpCommunicator (int contextTp, int contextPp, int contextCp, int genTp, int genPp, int genCp,
482493 bool isMLA = false , bool contextDP = false , bool generationDP = false )
483494 {
@@ -942,12 +953,13 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
942953 int maxBlockInWindow = windowSize / mCacheState ->getModelConfig ().mTokensPerBlock ;
943954 int startBlockId = std::max (0 , static_cast <int >(blockRangeForWindow.size ()) - (maxBlockInWindow + 1 ));
944955 int blockIdInWindow = 0 ;
945- std::vector< int > globalBlockIdsForWindow (blockRangeForWindow. size ());
946- std::iota (globalBlockIdsForWindow. begin (), globalBlockIdsForWindow. end (), 0 ) ;
956+ // This is relevant only when context parallelism is enabled.
957+ std::vector< int > globalBlockIdsForWindow;
947958 if (request->mCPMetaData .has_value ())
948959 {
949960 // Currently, limit support of CPMetadata to a single window size in our testcases.
950961 TLLM_CHECK (windowSizes.size () == 1 );
962+ globalBlockIdsForWindow = std::vector<int >(blockRangeForWindow.size ());
951963 auto const & cpData = request->mCPMetaData .value ();
952964 initial = cpData.mTotalSeqLenAcrossCPRanks ;
953965 globalBlockIdsForWindow = cpData.mGlobalBlockIds ;
@@ -956,7 +968,7 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
956968 {
957969 if (blockIdInWindow >= startBlockId)
958970 {
959- verifyBlockData (*it, initial, globalBlockIdsForWindow[blockIdx], windowSize);
971+ verifyBlockData (*it, initial, globalBlockIdsForWindow. empty () ? blockIdx : globalBlockIdsForWindow [blockIdx], windowSize);
960972 }
961973 blockIdx++;
962974 blockIdInWindow++;
@@ -966,6 +978,11 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
966978
967979 void fillBlockData (tensorrt_llm::runtime::ITensor& blockData, int blockId, size_t initial, int windowSize = 0 )
968980 {
981+ static const int TARGET_RANK = getEnvMpiDebugRank (); // -1 means all ranks.
982+ if (TARGET_RANK == -1 || tensorrt_llm::mpi::MpiComm::world ().getRank () == TARGET_RANK)
983+ {
984+ TLLM_LOG_INFO (" fillBlockData called for rank %d mRankInInstance %d blockId %d windowSize %d" , mRank , mRankInInstance , blockId, windowSize);
985+ }
969986 auto const & blockManager = mManager ->getBlockManager ();
970987 auto const onlyWindowSize = windowSize == 0 ? blockManager.getPoolWindowSize (0 ) : windowSize;
971988 auto const & bufferManager = blockManager.getBufferManager (onlyWindowSize);
@@ -1015,6 +1032,19 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
10151032 using ValueType = decltype (generateValue);
10161033 auto * dataPtr = static_cast <ValueType*>(hostTensor->data (keyIndex));
10171034 *dataPtr = generateValue;
1035+ if (TARGET_RANK == -1 || tensorrt_llm::mpi::MpiComm::world ().getRank () == TARGET_RANK)
1036+ {
1037+ TLLM_LOG_INFO (tensorrt_llm::mpi::MpiComm::world ().getRank (),
1038+ " [RANK %d] [fillBlockData::key] blockId=%d, layer=%d->%d, head=%d->%d, token=%d->%d, hidden=%d, "
1039+ " keyIdx=%zu, set_value=%s, dataType=%d" ,
1040+ tensorrt_llm::mpi::MpiComm::world ().getRank (),
1041+ blockId, layerId, layerId + startLayerId,
1042+ headId, headId + startHeadId,
1043+ tokenId, tokenId + startTokenId,
1044+ hiddenId, keyIndex,
1045+ std::to_string (static_cast <double >(*dataPtr)).c_str (),
1046+ static_cast <int >(blockData.getDataType ()));
1047+ }
10181048 },
10191049 generateExpectedValue (initial, windowSize, tokenId + startTokenId, layerId + startLayerId,
10201050 headId + startHeadId, hiddenId, true , blockData.getDataType ()));
@@ -1040,10 +1070,16 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
10401070 }
10411071
10421072 void verifyBlockData (
1043- tensorrt_llm::runtime::ITensor& blockData, size_t initial, int globalBlockId , int windowSize = 0 )
1073+ tensorrt_llm::runtime::ITensor& blockData, size_t initial, int blockId , int windowSize = 0 )
10441074 {
10451075 auto const & blockManager = mManager ->getBlockManager ();
10461076
1077+ static const int TARGET_RANK = getEnvMpiDebugRank (); // -1 means all ranks.
1078+ if (TARGET_RANK == -1 || tensorrt_llm::mpi::MpiComm::world ().getRank () == TARGET_RANK)
1079+ {
1080+ TLLM_LOG_INFO (" verifyBlockData called for rank %d mRankInInstance %d blockId %d" , mRank , mRankInInstance , blockId);
1081+ }
1082+
10471083 auto const onlyWindowSize = windowSize == 0 ? blockManager.getPoolWindowSize (0 ) : windowSize;
10481084 auto const & bufferManager = blockManager.getBufferManager (onlyWindowSize);
10491085
@@ -1071,7 +1107,7 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
10711107 }
10721108 int kvFactor = mCacheState ->getAttentionConfig ().mKvFactor ;
10731109 int tokensPerBlock = mCacheState ->getModelConfig ().mTokensPerBlock ;
1074- int startTokenId = globalBlockId * tokensPerBlock;
1110+ int startTokenId = blockId * tokensPerBlock;
10751111 int sizePerHead = mCacheState ->getModelConfig ().mSizePerHead ;
10761112
10771113 bufferManager.copy (blockData, *hostTensor);
@@ -1096,6 +1132,24 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
10961132 using ValueType = decltype (generateValue);
10971133 auto * dataPtr = static_cast <ValueType*>(hostTensor->data (keyIndex));
10981134 EXPECT_EQ (*dataPtr, generateValue);
1135+ if (TARGET_RANK == -1 || tensorrt_llm::mpi::MpiComm::world ().getRank () == TARGET_RANK)
1136+ {
1137+ std::string result = " " ;
1138+ if (*dataPtr != generateValue) {
1139+ result = " FAILED!" ;
1140+ }
1141+ TLLM_LOG_INFO (tensorrt_llm::mpi::MpiComm::world ().getRank (),
1142+ " [RANK %d] [verifyBlockData::value] blockId=%d, layer=%d->%d, head=%d->%d, token=%d->%d, hidden=%d, "
1143+ " valueIdx=%zu, actual_value=%s, dataType=%d %s" ,
1144+ tensorrt_llm::mpi::MpiComm::world ().getRank (),
1145+ blockId, layerId, layerId + startLayerId,
1146+ headId, headId + startHeadId,
1147+ tokenId, tokenId + startTokenId,
1148+ hiddenId, valueIndex,
1149+ std::to_string (static_cast <double >(*dataPtr)).c_str (),
1150+ static_cast <int >(blockData.getDataType ()),
1151+ result.c_str ());
1152+ }
10991153 },
11001154 generateExpectedValue (initial, windowSize, tokenId + startTokenId, layerId + startLayerId,
11011155 headId + startHeadId, hiddenId, true , blockData.getDataType ()));
@@ -1121,6 +1175,12 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
11211175 std::variant<double , float , int16_t , int8_t > generateExpectedValue (size_t initial, int windowSize, int tokenId,
11221176 int layerId, int headId, int hiddenId, bool key, nvinfer1::DataType dataType)
11231177 {
1178+ static const int TARGET_RANK = getEnvMpiDebugRank (); // -1 means all ranks.
1179+ if (TARGET_RANK == -1 || tensorrt_llm::mpi::MpiComm::world ().getRank () == TARGET_RANK)
1180+ {
1181+ TLLM_LOG_INFO (" generateExpectedValue called for rank %d, initial=%zu, windowSize=%d, tokenId=%d, layerId=%d, headId=%d, hiddenId=%d, key=%d, dataType=%d" ,
1182+ tensorrt_llm::mpi::MpiComm::world ().getRank (), initial, windowSize, tokenId, layerId, headId, hiddenId, key, static_cast <int >(dataType));
1183+ }
11241184 size_t seed = 0 ;
11251185 std::size_t hashValue = std::hash<size_t >{}(initial);
11261186 std::hash<int > hasher{};
@@ -1208,7 +1268,7 @@ TEST_P(AsymmetricalCacheTest, TestCase)
12081268 {
12091269 GTEST_SKIP () << " Temporarily skipping cache transceiver tests with NIXL backend for CP." ;
12101270 }
1211- std::vector<int > lenList = {30 , 10 , 60 , 80 };
1271+ std::vector<int > lenList = {8 };
12121272 if (genCp > 1 )
12131273 {
12141274 std::vector<int > updatedLenList;
@@ -1236,7 +1296,7 @@ TEST_P(AsymmetricalCacheTest, TestCase)
12361296 std::vector<std::shared_ptr<WrappedLlmRequest>> requests;
12371297
12381298 // the second loop is for cache reuse
1239- for (int i = 0 ; i < 2 ; i++)
1299+ for (int i = 0 ; i < 1 ; i++)
12401300 {
12411301 for (auto len : lenList)
12421302 {
@@ -1413,10 +1473,22 @@ TEST_P(AsymmetricalCacheTestWithDP, TestCase)
14131473}
14141474
14151475INSTANTIATE_TEST_CASE_P (AsymmetricCaseTest0, AsymmetricalCacheTest,
1416- testing::Combine (testing::Values(1 , 2 ), testing::Values(1 , 2 ), testing::Values(1 ), testing::Values(1 , 2 ),
1417- testing::Values(1 , 2 ), testing::Values(1 ), testing::Values(4 ), testing::Values(4 ), testing::Values(4 ),
1418- testing::Values(16 ), testing::Values(nvinfer1::DataType::kFLOAT , nvinfer1::DataType::kINT8 ), testing::Values(2 ),
1419- testing::Values(false ), testing::Values(false ), testing::Values(false ), testing::Values(true , false )));
1476+ testing::Combine (testing::Values(1 ),
1477+ testing::Values(1 ),
1478+ testing::Values(1 ),
1479+ testing::Values(1 ),
1480+ testing::Values(1 ),
1481+ testing::Values(1 ),
1482+ testing::Values(2 ),
1483+ testing::Values(1 ),
1484+ testing::Values(4 ),
1485+ testing::Values(8 ),
1486+ testing::Values(nvinfer1::DataType::kINT8 ),
1487+ testing::Values(1 ),
1488+ testing::Values(false ),
1489+ testing::Values(false ),
1490+ testing::Values(false ),
1491+ testing::Values(true )));
14201492
14211493INSTANTIATE_TEST_CASE_P (AsymmetricCaseTestWithWindow, AsymmetricalCacheTest,
14221494 testing::Combine (testing::Values(1 ), testing::Values(1 ), testing::Values(1 ), testing::Values(1 ), testing::Values(1 ),
0 commit comments