From 405e74166a5df86c1235e04d4f495c83ec08cab9 Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Fri, 13 Mar 2026 00:25:33 +0000 Subject: [PATCH] refactor(linter/plugins): add shared binary search function for tokens methods (#20310) All tokens methods contain binary searches through `tokens` array. The code is repeated inline in every method to avoid the cost of function calls, but the downside is that it makes the code verbose and hard to understand. Break out the binary search logic into a function which is used in every method. This is a perf regression due to the extra cost of function calls, but that is fixed in #20312 by a TSDown plugin which inlines `firstTokenAtOrAfter` into all call sites, producing code in final build that is almost identical to before this PR. This change makes the code shorter and easier to understand, but main motivation is to enable switching in a future PR to performing the binary searches reading directly from the buffer, without touching token objects. This means that tokens can be deserialized lazily, instead of having to deserialize the entire tokens array eagerly as we do now. This will be a large perf gain, but would be impractically complex if the convoluted buffer-search logic had to be repeated in 20+ places. --- apps/oxlint/src-js/plugins/tokens_methods.ts | 286 ++++--------------- 1 file changed, 57 insertions(+), 229 deletions(-) diff --git a/apps/oxlint/src-js/plugins/tokens_methods.ts b/apps/oxlint/src-js/plugins/tokens_methods.ts index bc754496b0926..75ae5606cd9d2 100644 --- a/apps/oxlint/src-js/plugins/tokens_methods.ts +++ b/apps/oxlint/src-js/plugins/tokens_methods.ts @@ -133,30 +133,12 @@ export function getTokens> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - sliceStart = mid; - } - } - + let sliceStart = firstTokenAtOrAfter(tokenList, rangeStart, 0); // Binary search for first token past `node`'s end - let sliceEnd = tokensLength; - for (let lo = sliceStart; lo < sliceEnd; ) { - const mid = (lo + sliceEnd) >> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - sliceEnd = mid; - } - } + let sliceEnd = firstTokenAtOrAfter(tokenList, rangeEnd, sliceStart); sliceStart = Math.max(0, sliceStart - beforeCount); - sliceEnd = Math.min(sliceEnd + afterCount, tokensLength); + sliceEnd = Math.min(sliceEnd + afterCount, tokenList.length); if (typeof filter !== "function") { return tokenList.slice( @@ -243,17 +225,9 @@ export function getFirstToken> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - startIndex = mid; - } - } + const startIndex = firstTokenAtOrAfter(tokenList, rangeStart, 0); + const tokensLength = tokenList.length; if (typeof filter !== "function") { const skipTo = startIndex + (skip ?? 0); // Avoid indexing out of bounds @@ -337,27 +311,9 @@ export function getFirstTokens> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - sliceStart = mid; - } - } - + const sliceStart = firstTokenAtOrAfter(tokenList, rangeStart, 0); // Binary search for first token past `node`'s end - let sliceEnd = tokensLength; - for (let lo = sliceStart; lo < sliceEnd; ) { - const mid = (lo + sliceEnd) >> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - sliceEnd = mid; - } - } + const sliceEnd = firstTokenAtOrAfter(tokenList, rangeEnd, sliceStart); if (typeof filter !== "function") { if (typeof count !== "number") return tokenList.slice(sliceStart, sliceEnd) as Result; @@ -435,18 +391,7 @@ export function getLastToken> 1; - if (tokenList[mid].start < rangeEnd) { - lastTokenIndex = mid + 1; - } else { - hi = mid; - } - } - - lastTokenIndex--; + const lastTokenIndex = firstTokenAtOrAfter(tokenList, rangeEnd, 0) - 1; if (typeof filter !== "function") { const skipTo = lastTokenIndex - (skip ?? 0); @@ -534,27 +479,9 @@ export function getLastTokens> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - sliceStart = mid; - } - } - + const sliceStart = firstTokenAtOrAfter(tokenList, rangeStart, 0); // Binary search for first token past `node`'s end - let sliceEnd = tokensLength; - for (let lo = sliceStart; lo < sliceEnd; ) { - const mid = (lo + sliceEnd) >> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - sliceEnd = mid; - } - } + const sliceEnd = firstTokenAtOrAfter(tokenList, rangeEnd, sliceStart); if (typeof filter !== "function") { if (typeof count !== "number") return tokenList.slice(sliceStart, sliceEnd) as Result; @@ -630,20 +557,8 @@ export function getTokenBefore> 1; - if (tokenList[mid].start < nodeStart) { - beforeIndex = mid + 1; - } else { - hi = mid; - } - } - - beforeIndex--; + // Binary search for token immediately before the given node, token, or comment + let beforeIndex = firstTokenAtOrAfter(tokenList, nodeStart, 0) - 1; if (typeof filter !== "function") { const skipTo = beforeIndex - (skip ?? 0); @@ -744,16 +659,8 @@ export function getTokensBefore< const targetStart = nodeOrToken.range[0]; - let sliceEnd = 0; - let hi = tokenList.length; - while (sliceEnd < hi) { - const mid = (sliceEnd + hi) >> 1; - if (tokenList[mid].start < targetStart) { - sliceEnd = mid + 1; - } else { - hi = mid; - } - } + // Binary search for first token past `nodeOrToken`'s start + const sliceEnd = firstTokenAtOrAfter(tokenList, targetStart, 0); // Fast path for the common case if (typeof filter !== "function") { @@ -830,18 +737,10 @@ export function getTokenAfter> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - startIndex = mid; - } - } + const startIndex = firstTokenAtOrAfter(tokenList, rangeEnd, 0); // Fast path for the common case + const tokensLength = tokenList.length; if (typeof filter !== "function") { const skipTo = startIndex + (skip ?? 0); // Avoid indexing out of bounds @@ -936,15 +835,8 @@ export function getTokensAfter> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - sliceStart = mid; - } - } + // Binary search for first token past `nodeOrToken`'s end + const sliceStart = firstTokenAtOrAfter(tokenList, rangeEnd, 0); // Fast path for the common case if (typeof filter !== "function") { @@ -1022,30 +914,12 @@ export function getTokensBetween< // The first argument must be the left node. // Same as ESLint's implementation. const rangeStart = left.range[1], - rangeEnd = right.range[0], - tokensLength = tokenList.length; + rangeEnd = right.range[0]; // Binary search for first token past "between" range start - let sliceStart = tokensLength; - for (let lo = 0; lo < sliceStart; ) { - const mid = (lo + sliceStart) >> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - sliceStart = mid; - } - } - + let sliceStart = firstTokenAtOrAfter(tokenList, rangeStart, 0); // Binary search for first token past "between" range end - let sliceEnd = tokensLength; - for (let lo = sliceStart; lo < sliceEnd; ) { - const mid = (lo + sliceEnd) >> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - sliceEnd = mid; - } - } + let sliceEnd = firstTokenAtOrAfter(tokenList, rangeEnd, sliceStart); // Apply padding sliceStart = Math.max(0, sliceStart - padding); @@ -1129,15 +1003,7 @@ export function getFirstTokenBetween< const tokensLength = tokenList.length; // Binary search for token immediately following `left` - let firstTokenIndex = tokensLength; - for (let lo = 0; lo < firstTokenIndex; ) { - const mid = (lo + firstTokenIndex) >> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - firstTokenIndex = mid; - } - } + const firstTokenIndex = firstTokenAtOrAfter(tokenList, rangeStart, 0); if (typeof filter !== "function") { const skipTo = firstTokenIndex + (skip ?? 0); @@ -1222,29 +1088,10 @@ export function getFirstTokensBetween< const rangeStart = left.range[1], rangeEnd = right.range[0]; - const tokensLength = tokenList.length; - // Find the first token after `left` - let sliceStart = tokensLength; - for (let lo = 0; lo < sliceStart; ) { - const mid = (lo + sliceStart) >> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - sliceStart = mid; - } - } - + const sliceStart = firstTokenAtOrAfter(tokenList, rangeStart, 0); // Find the first token at or after `right` - let sliceEnd = tokensLength; - for (let lo = sliceStart; lo < sliceEnd; ) { - const mid = (lo + sliceEnd) >> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - sliceEnd = mid; - } - } + const sliceEnd = firstTokenAtOrAfter(tokenList, rangeEnd, sliceStart); if (typeof filter !== "function") { if (typeof count !== "number") return tokenList.slice(sliceStart, sliceEnd) as Result; @@ -1321,18 +1168,9 @@ export function getLastTokenBetween< const rangeStart = left.range[1], rangeEnd = right.range[0]; - // Binary search for token immediately preceding `right` + // Binary search for token immediately preceding `right`. // The found token may be within the left node if there are no tokens between the nodes. - let lastTokenIndex = -1; - for (let lo = 0, hi = tokenList.length - 1; lo <= hi; ) { - const mid = (lo + hi) >> 1; - if (tokenList[mid].start < rangeEnd) { - lastTokenIndex = mid; - lo = mid + 1; - } else { - hi = mid - 1; - } - } + const lastTokenIndex = firstTokenAtOrAfter(tokenList, rangeEnd, 0) - 1; // Fast path for the common case if (typeof filter !== "function") { @@ -1416,30 +1254,12 @@ export function getLastTokensBetween< // The first argument must be the left node. // Same as ESLint's implementation. const rangeStart = left.range[1], - rangeEnd = right.range[0], - tokensLength = tokenList.length; + rangeEnd = right.range[0]; // Binary search for first token past "between" range start - let sliceStart = tokensLength; - for (let lo = 0; lo < sliceStart; ) { - const mid = (lo + sliceStart) >> 1; - if (tokenList[mid].start < rangeStart) { - lo = mid + 1; - } else { - sliceStart = mid; - } - } - + const sliceStart = firstTokenAtOrAfter(tokenList, rangeStart, 0); // Binary search for first token past "between" range end - let sliceEnd = tokensLength; - for (let lo = sliceStart; lo < sliceEnd; ) { - const mid = (lo + sliceEnd) >> 1; - if (tokenList[mid].start < rangeEnd) { - lo = mid + 1; - } else { - sliceEnd = mid; - } - } + const sliceEnd = firstTokenAtOrAfter(tokenList, rangeEnd, sliceStart); // Fast path for the common case if (typeof filter !== "function") { @@ -1566,17 +1386,9 @@ export function isSpaceBetween(first: NodeOrToken, second: NodeOrToken): boolean // Binary search for the first token past `rangeStart`. // Unless `first` and `second` are adjacent or overlapping, // the token will be the first token between the two nodes. - const tokensAndCommentsLength = tokensAndComments.length; - let tokenBetweenIndex = tokensAndCommentsLength; - for (let lo = 0; lo < tokenBetweenIndex; ) { - const mid = (lo + tokenBetweenIndex) >> 1; - if (tokensAndComments[mid].start < rangeStart) { - lo = mid + 1; - } else { - tokenBetweenIndex = mid; - } - } + let tokenBetweenIndex = firstTokenAtOrAfter(tokensAndComments, rangeStart, 0); + const tokensAndCommentsLength = tokensAndComments.length; for ( let lastTokenEnd = rangeStart; tokenBetweenIndex < tokensAndCommentsLength; @@ -1639,17 +1451,9 @@ export function isSpaceBetweenTokens(first: NodeOrToken, second: NodeOrToken): b // Binary search for the first token past `rangeStart`. // Unless `first` and `second` are adjacent or overlapping, // the token will be the first token between the two nodes. - const tokensAndCommentsLength = tokensAndComments.length; - let tokenBetweenIndex = tokensAndCommentsLength; - for (let lo = 0; lo < tokenBetweenIndex; ) { - const mid = (lo + tokenBetweenIndex) >> 1; - if (tokensAndComments[mid].start < rangeStart) { - lo = mid + 1; - } else { - tokenBetweenIndex = mid; - } - } + let tokenBetweenIndex = firstTokenAtOrAfter(tokensAndComments, rangeStart, 0); + const tokensAndCommentsLength = tokensAndComments.length; for ( let lastTokenEnd = rangeStart; tokenBetweenIndex < tokensAndCommentsLength; @@ -1671,3 +1475,27 @@ export function isSpaceBetweenTokens(first: NodeOrToken, second: NodeOrToken): b return false; } + +/** + * Find the index of the first token in `tokens` whose `start` is >= `offset`, via binary search. + * + * Searched range starts at `startIndex` and ends at `tokens.length`. + * + * Returns `tokens.length` if all tokens have `start` < `offset`. + * + * @param tokens - Sorted array of tokens/comments + * @param offset - Source offset to search for + * @param startIndex - Starting index for the search + * @returns Index of first token with `start >= offset` + */ +function firstTokenAtOrAfter(tokens: TokenOrComment[], offset: number, startIndex: number): number { + for (let endIndex = tokens.length; startIndex < endIndex; ) { + const mid = (startIndex + endIndex) >> 1; + if (tokens[mid].start < offset) { + startIndex = mid + 1; + } else { + endIndex = mid; + } + } + return startIndex; +}