@@ -140,8 +140,7 @@ class GenericLlmRequest
140140 std::optional<SizeType32> languageAdapterUid = std::nullopt ,
141141 std::optional<MillisecondsType> allottedTimeMs = std::nullopt ,
142142 std::optional<executor::ContextPhaseParams> const & contextPhaseParams = std::nullopt ,
143- std::optional<CacheSaltIDType> cacheSaltID = std::nullopt , std::optional<TimePoint> arrivalTime = std::nullopt ,
144- std::optional<Duration> globalSteadyClockOffset = std::nullopt )
143+ std::optional<CacheSaltIDType> cacheSaltID = std::nullopt , std::optional<TimePoint> arrivalTime = std::nullopt )
145144 : mRequestId (requestId)
146145 , mPromptLen (inputTokens->size ())
147146 , mMaxNewTokens(maxNewTokens)
@@ -199,7 +198,6 @@ class GenericLlmRequest
199198 , mLanguageAdapterUid (languageAdapterUid)
200199 , mAllottedTimeMs (allottedTimeMs)
201200 , mCacheSaltID (cacheSaltID)
202- , mGlobalSteadyClockOffset (globalSteadyClockOffset)
203201 {
204202 if (mEncoderTokens .has_value () || encoderInputFeatures.has_value ())
205203 {
@@ -227,8 +225,7 @@ class GenericLlmRequest
227225 executor::PriorityType priority = executor::Request::kDefaultPriority , SizeType32 numReturnSequences = 1 ,
228226 std::optional<SizeType32> languageAdapterUid = std::nullopt ,
229227 std::optional<executor::ContextPhaseParams> const & contextPhaseParams = std::nullopt ,
230- std::optional<CacheSaltIDType> cacheSaltID = std::nullopt ,
231- std::optional<Duration> globalSteadyClockOffset = std::nullopt )
228+ std::optional<CacheSaltIDType> cacheSaltID = std::nullopt )
232229 : mRequestId (requestId)
233230 , mPromptLen (inputTokens.size())
234231 , mMaxNewTokens (maxNewTokens)
@@ -269,7 +266,6 @@ class GenericLlmRequest
269266 , mNumReturnSequences (numReturnSequences)
270267 , mLanguageAdapterUid (languageAdapterUid)
271268 , mCacheSaltID (cacheSaltID)
272- , mGlobalSteadyClockOffset (globalSteadyClockOffset)
273269 {
274270 if (mEncoderTokens .has_value ())
275271 {
@@ -1897,6 +1893,9 @@ class GenericLlmRequest
18971893 // current position of the prompt tuning table (only used in chunked prefill mode)
18981894 SizeType32 mPtableCurrentPosition {0 };
18991895
1896+ // The offset between local steady clock and global steady clock (at rank 0)
1897+ inline static std::optional<Duration> mGlobalSteadyClockOffset {std::nullopt };
1898+
19001899protected:
19011900 bool mIsStreaming ;
19021901
@@ -2059,9 +2058,6 @@ class GenericLlmRequest
20592058 // Cache salt id for each request.
20602059 std::optional<CacheSaltIDType> mCacheSaltID {std::nullopt };
20612060
2062- // The offset between local steady clock and global steady clock (at rank 0)
2063- std::optional<Duration> mGlobalSteadyClockOffset ;
2064-
20652061private:
20662062 void initialize (
20672063 VecTokens const & inputTokens, bool outputLogProbs, std::optional<TimePoint> arrivalTime = std::nullopt )
@@ -2158,6 +2154,7 @@ class GenericLlmRequest
21582154
21592155 if (mReturnPerfMetrics )
21602156 {
2157+ // arrivalTime is assumed to be recorded at the rank 0, so no need to convert it to global clock
21612158 mPerfMetrics .timingMetrics .arrivalTime = arrivalTime.value_or (getSteadyClockNow ());
21622159 }
21632160 mStartTime = getSteadyClockNow ();
@@ -2265,8 +2262,7 @@ class LlmRequest : public GenericLlmRequest<runtime::ITensor::SharedPtr>
22652262 std::optional<SizeType32> languageAdapterUid = std::nullopt ,
22662263 std::optional<MillisecondsType> allottedTimeMs = std::nullopt ,
22672264 std::optional<executor::ContextPhaseParams> const & contextPhaseParams = std::nullopt ,
2268- std::optional<CacheSaltIDType> cacheSaltID = std::nullopt , std::optional<TimePoint> arrivalTime = std::nullopt ,
2269- std::optional<Duration> globalSteadyClockOffset = std::nullopt )
2265+ std::optional<CacheSaltIDType> cacheSaltID = std::nullopt , std::optional<TimePoint> arrivalTime = std::nullopt )
22702266 : Base(requestId, maxNewTokens, std::make_shared<std::vector<TokenIdType>>(std::move(inputTokens)),
22712267 samplingConfig, isStreaming, endId, padId, std::move(embeddingBias), std::move(badWordsList),
22722268 std::move (stopWordsList),
@@ -2297,7 +2293,7 @@ class LlmRequest : public GenericLlmRequest<runtime::ITensor::SharedPtr>
22972293 : std::optional<std::shared_ptr<VecTokenExtraIds>>(std::nullopt ),
22982294 numReturnSequences, std::move(eagleConfig), skipCrossAttnBlocks, returnPerfMetrics,
22992295 std::move(guidedDecodingParams), languageAdapterUid, allottedTimeMs, contextPhaseParams, cacheSaltID,
2300- arrivalTime, globalSteadyClockOffset )
2296+ arrivalTime)
23012297 {
23022298 }
23032299
0 commit comments