@@ -2450,12 +2450,40 @@ BlocksPerWindow BaseKVCacheManager::calculateMaxNumBlocks(executor::KvCacheConfi
24502450    };
24512451
24522452    std::map<SizeType32, float > windowSizeToShare;
2453-     //  NOTE: Righteously, blocks allocated should be proportional with
2454-     //  regard to window size. Currently, we are first allocating identical
2455-     //  number of blocks for all layers to achieve identical performance.
2456-     for  (auto  const & [windowSize, _] : windowSizeToLayers)
2453+     if  (auto  envStr = std::getenv (" TRTLLM_WINDOW_SIZE_SHARES" 
24572454    {
2458-         windowSizeToShare[windowSize] = 1 .0f  / windowSizeToLayers.size ();
2455+         float  const  fraction = windowSizeSum / windowSizesTotalSum;
2456+         TLLM_CHECK (0 .0f  < fraction && fraction <= 1 .0f );
2457+         windowSizeToShare[windowSize] = fraction;
2458+         std::stringstream ss (envStr);
2459+         std::vector<float > shares;
2460+         float  share;
2461+         while  (ss >> share)
2462+         {
2463+             shares.push_back (share);
2464+             if  (ss.peek () == ' ,' 
2465+                 ss.ignore ();
2466+         }
2467+ 
2468+         TLLM_CHECK_WITH_INFO (shares.size () == windowSizeToLayers.size (),
2469+             " Number of shares in TRTLLM_WINDOW_SIZE_SHARES (%ld) must match number of window sizes (%ld)" 
2470+             shares.size (), windowSizeToLayers.size ());
2471+ 
2472+         size_t  i = 0 ;
2473+         for  (auto  const & [windowSize, _] : windowSizeToLayers)
2474+         {
2475+             windowSizeToShare[windowSize] = shares[i++];
2476+         }
2477+     }
2478+     else 
2479+     {
2480+         //  NOTE: Righteously, blocks allocated should be proportional with
2481+         //  regard to window size. Currently, we are first allocating identical
2482+         //  number of blocks for all layers to achieve identical performance.
2483+         for  (auto  const & [windowSize, _] : windowSizeToLayers)
2484+         {
2485+             windowSizeToShare[windowSize] = 1 .0f  / windowSizeToLayers.size ();
2486+         }
24592487    }
24602488
24612489    std::vector<SizeType32> blocksPrimary;
0 commit comments