diff --git a/apps/oxlint/src-js/plugins/comments_methods.ts b/apps/oxlint/src-js/plugins/comments_methods.ts index 92287a3eb6d1b..f272fd21fba2c 100644 --- a/apps/oxlint/src-js/plugins/comments_methods.ts +++ b/apps/oxlint/src-js/plugins/comments_methods.ts @@ -11,16 +11,22 @@ import { initCommentsBuffer, deserializeCommentIfNeeded, } from "./comments.ts"; -import { sourceText } from "./source_code.ts"; +import { + initTokensAndCommentsBuffer, + tokensAndCommentsUint32, + tokensAndCommentsLen, + MERGED_SIZE32, + MERGED_SIZE32_SHIFT, + MERGED_ORIGINAL_INDEX_OFFSET32, + MERGED_TYPE_OFFSET32, + MERGED_TYPE_TOKEN, +} from "./tokens_and_comments.ts"; import { firstTokenAtOrAfter } from "./tokens_methods.ts"; -import { debugAssertIsNonNull } from "../utils/asserts.ts"; +import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; import type { Comment } from "./comments.ts"; import type { Node, NodeOrToken } from "./types.ts"; -// Regex that tests if a string is entirely whitespace. -const WHITESPACE_ONLY_REGEXP = /^\s*$/; - /** * Retrieve an array containing all comments in the source code. * @returns Array of `Comment`s in order they appear in source. @@ -31,6 +37,8 @@ export function getAllComments(): Comment[] { return comments; } +debugAssert(MERGED_TYPE_OFFSET32 > 0, "`getCommentsBefore` relies on this"); + /** * Get all comments directly before the given node or token. * @@ -49,36 +57,45 @@ export function getAllComments(): Comment[] { * @returns Array of `Comment`s in occurrence order. */ export function getCommentsBefore(nodeOrToken: NodeOrToken): Comment[] { - if (commentsUint32 === null) initCommentsBuffer(); - debugAssertIsNonNull(commentsUint32); - debugAssertIsNonNull(sourceText); + if (tokensAndCommentsUint32 === null) initTokensAndCommentsBuffer(); + debugAssertIsNonNull(tokensAndCommentsUint32); // Early exit for files with no comments if (commentsLen === 0) return []; - let targetStart = nodeOrToken.range[0]; // start - - // Binary search for first comment at or past `nodeOrToken`'s start. - // Comments before this index are candidates to be included in returned array. - const sliceEnd = firstTokenAtOrAfter(commentsUint32, targetStart, 0, commentsLen); - - let sliceStart = commentsLen; - for (let i = sliceEnd - 1; i >= 0; i--) { - // Read `end` from buffer: u32 at offset 1 of each 4 x u32 entry - const commentEnd = commentsUint32[(i << 2) + 1]; - const gap = sourceText.slice(commentEnd, targetStart); - // Ensure that there is nothing except whitespace between the end of the - // current comment and the start of the next one as we iterate backwards - if (WHITESPACE_ONLY_REGEXP.test(gap)) { - sliceStart = i; - // Read `start` from buffer - targetStart = commentsUint32[i << 2]; - } else { - break; - } + const targetStart = nodeOrToken.range[0]; + + // Binary search merged buffer for first entry at or after target's start + const searchIndex = firstTokenAtOrAfter( + tokensAndCommentsUint32, + targetStart, + 0, + tokensAndCommentsLen, + ); + + // Walk backwards over consecutive comments. + // Operate in pos32 space: `typePos32` points directly at the type field, decrementing by 4 each step, + // instead of recomputing `(i << 2) + 2` per iteration. + const startTypePos32 = + (searchIndex << MERGED_SIZE32_SHIFT) - (MERGED_SIZE32 - MERGED_TYPE_OFFSET32); + let typePos32 = startTypePos32; + // `MERGED_TYPE_OFFSET32` is greater than 0 (checked by debug assert above), so `typePos32 > 0` is right check. + // If `MERGED_TYPE_OFFSET32` was zero, it'd be `typePos32 >= 0`. + while (typePos32 > 0 && tokensAndCommentsUint32[typePos32] !== MERGED_TYPE_TOKEN) { + typePos32 -= MERGED_SIZE32; } - // Deserialize only the comments we're returning + const count32 = startTypePos32 - typePos32; + if (count32 === 0) return []; + + // Read `originalIndex` of earliest comment, calculate slice end from how far we walked. + // `typePos32` is at the entry before the first comment. + const sliceStart = + tokensAndCommentsUint32[ + typePos32 + (MERGED_SIZE32 - MERGED_TYPE_OFFSET32 + MERGED_ORIGINAL_INDEX_OFFSET32) + ]; + const sliceEnd = sliceStart + (count32 >> MERGED_SIZE32_SHIFT); + for (let i = sliceStart; i < sliceEnd; i++) { deserializeCommentIfNeeded(i); } @@ -104,35 +121,43 @@ export function getCommentsBefore(nodeOrToken: NodeOrToken): Comment[] { * @returns Array of `Comment`s in occurrence order. */ export function getCommentsAfter(nodeOrToken: NodeOrToken): Comment[] { - if (commentsUint32 === null) initCommentsBuffer(); - debugAssertIsNonNull(commentsUint32); - debugAssertIsNonNull(sourceText); + if (tokensAndCommentsUint32 === null) initTokensAndCommentsBuffer(); + debugAssertIsNonNull(tokensAndCommentsUint32); // Early exit for files with no comments if (commentsLen === 0) return []; - let targetEnd = nodeOrToken.range[1]; // end - - // Binary search for first comment at or past `nodeOrToken`'s end. - // Comments from this index onwards are candidates to be included in returned array. - const sliceStart = firstTokenAtOrAfter(commentsUint32, targetEnd, 0, commentsLen); - - let sliceEnd = 0; - for (let i = sliceStart; i < commentsLen; i++) { - // Ensure that there is nothing except whitespace between the - // end of the previous comment and the start of the current one - const commentStart = commentsUint32[i << 2]; - const gap = sourceText.slice(targetEnd, commentStart); - if (WHITESPACE_ONLY_REGEXP.test(gap)) { - sliceEnd = i + 1; - // Read `end` from buffer - targetEnd = commentsUint32[(i << 2) + 1]; - } else { - break; - } + const targetEnd = nodeOrToken.range[1]; + + // Binary search merged buffer for first entry at or after target's end. + const searchIndex = firstTokenAtOrAfter( + tokensAndCommentsUint32, + targetEnd, + 0, + tokensAndCommentsLen, + ); + + // Walk forwards over consecutive comments. + // Operate in pos32 space: `typePos32` points directly at the type field, incrementing by 4 each step, + // instead of recomputing `(i << 2) + 2` per iteration. + // No explicit bounds check is needed - a sentinel `MERGED_TYPE_TOKEN` entry is written after the last + // valid entry in `initTokensAndCommentsBuffer`, so the loop terminates naturally. + const startTypePos32 = (searchIndex << MERGED_SIZE32_SHIFT) + MERGED_TYPE_OFFSET32; + let typePos32 = startTypePos32; + while (tokensAndCommentsUint32[typePos32] !== MERGED_TYPE_TOKEN) { + typePos32 += MERGED_SIZE32; } - // Deserialize only the comments we're returning + const count32 = typePos32 - startTypePos32; + if (count32 === 0) return []; + + // Read `originalIndex` of earliest comment, calculate slice end from how far we walked + const sliceStart = + tokensAndCommentsUint32[ + startTypePos32 - (MERGED_TYPE_OFFSET32 - MERGED_ORIGINAL_INDEX_OFFSET32) + ]; + const sliceEnd = sliceStart + (count32 >> MERGED_SIZE32_SHIFT); + for (let i = sliceStart; i < sliceEnd; i++) { deserializeCommentIfNeeded(i); } diff --git a/apps/oxlint/src-js/plugins/tokens_and_comments.ts b/apps/oxlint/src-js/plugins/tokens_and_comments.ts index 39c9fbfeff80d..483d984c0db6b 100644 --- a/apps/oxlint/src-js/plugins/tokens_and_comments.ts +++ b/apps/oxlint/src-js/plugins/tokens_and_comments.ts @@ -58,18 +58,18 @@ export type TokenOrComment = Token | Comment; // // These constants define the shape of the data stored in `tokensAndCommentsUint32` as per the above. const MERGED_SIZE = 16; -const MERGED_SIZE32_SHIFT = 2; // 4 x u32s per entry (16 bytes) -const MERGED_SIZE32 = 1 << MERGED_SIZE32_SHIFT; // 4 x u32s per entry +export const MERGED_SIZE32_SHIFT = 2; // 4 x u32s per entry (16 bytes) +export const MERGED_SIZE32 = 1 << MERGED_SIZE32_SHIFT; // 4 x u32s per entry debugAssert(MERGED_SIZE === MERGED_SIZE32 * 4); debugAssert(MERGED_SIZE === COMMENT_SIZE, "Size of token, comment, and merged entry must be equal"); -const MERGED_ORIGINAL_INDEX_OFFSET32 = 1; // u32 index of the `original_index` field within an entry -const MERGED_TYPE_OFFSET32 = 2; // u32 index of the `is_comment` field within an entry +export const MERGED_ORIGINAL_INDEX_OFFSET32 = 1; // u32 index of the `original_index` field within an entry +export const MERGED_TYPE_OFFSET32 = 2; // u32 index of the `is_comment` field within an entry // Type of merged entry. // "Poor man's enum" which optimizes better than a TS enum. type MergedType = typeof MERGED_TYPE_TOKEN | typeof MERGED_TYPE_COMMENT; -const MERGED_TYPE_TOKEN = 0; +export const MERGED_TYPE_TOKEN = 0; const MERGED_TYPE_COMMENT = 1; // Cached `tokensAndComments` array, returned by `getTokensAndComments`. @@ -125,7 +125,8 @@ export function initTokensAndCommentsBuffer(): void { // for all files, so we avoid allocating a large buffer each time. // `Uint32Array`s can't grow in place, so allocate a new one. // First allocation uses minimum capacity. Subsequent growths double, to avoid frequent reallocations. - const requiredLen32 = tokensAndCommentsLen << MERGED_SIZE32_SHIFT; + // +1 entry for sentinel (see below). + const requiredLen32 = (tokensAndCommentsLen + 1) << MERGED_SIZE32_SHIFT; const backingLen = tokensAndCommentsBackingUint32.length; if (backingLen < requiredLen32) { tokensAndCommentsBackingUint32 = new Uint32Array( @@ -144,6 +145,12 @@ export function initTokensAndCommentsBuffer(): void { mergeTokensAndComments(tokensUint32, commentsUint32); } + // Write a sentinel `MERGED_TYPE_TOKEN` entry immediately after the last valid entry. + // This allows `getCommentsAfter`'s forward walk to terminate without an explicit bounds check + // against `tokensAndCommentsLen` on every iteration - the sentinel acts as a natural stop. + tokensAndCommentsUint32[(tokensAndCommentsLen << MERGED_SIZE32_SHIFT) + MERGED_TYPE_OFFSET32] = + MERGED_TYPE_TOKEN; + debugCheckMergedOrder(); }