Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 77 additions & 52 deletions apps/oxlint/src-js/plugins/comments_methods.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,22 @@ import {
initCommentsBuffer,
deserializeCommentIfNeeded,
} from "./comments.ts";
import { sourceText } from "./source_code.ts";
import {
initTokensAndCommentsBuffer,
tokensAndCommentsUint32,
tokensAndCommentsLen,
MERGED_SIZE32,
MERGED_SIZE32_SHIFT,
MERGED_ORIGINAL_INDEX_OFFSET32,
MERGED_TYPE_OFFSET32,
MERGED_TYPE_TOKEN,
} from "./tokens_and_comments.ts";
import { firstTokenAtOrAfter } from "./tokens_methods.ts";
import { debugAssertIsNonNull } from "../utils/asserts.ts";
import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts";

import type { Comment } from "./comments.ts";
import type { Node, NodeOrToken } from "./types.ts";

// Regex that tests if a string is entirely whitespace.
const WHITESPACE_ONLY_REGEXP = /^\s*$/;

/**
* Retrieve an array containing all comments in the source code.
* @returns Array of `Comment`s in order they appear in source.
Expand All @@ -31,6 +37,8 @@ export function getAllComments(): Comment[] {
return comments;
}

debugAssert(MERGED_TYPE_OFFSET32 > 0, "`getCommentsBefore` relies on this");

/**
* Get all comments directly before the given node or token.
*
Expand All @@ -49,36 +57,45 @@ export function getAllComments(): Comment[] {
* @returns Array of `Comment`s in occurrence order.
*/
export function getCommentsBefore(nodeOrToken: NodeOrToken): Comment[] {
if (commentsUint32 === null) initCommentsBuffer();
debugAssertIsNonNull(commentsUint32);
debugAssertIsNonNull(sourceText);
if (tokensAndCommentsUint32 === null) initTokensAndCommentsBuffer();
debugAssertIsNonNull(tokensAndCommentsUint32);

// Early exit for files with no comments
if (commentsLen === 0) return [];

let targetStart = nodeOrToken.range[0]; // start

// Binary search for first comment at or past `nodeOrToken`'s start.
// Comments before this index are candidates to be included in returned array.
const sliceEnd = firstTokenAtOrAfter(commentsUint32, targetStart, 0, commentsLen);

let sliceStart = commentsLen;
for (let i = sliceEnd - 1; i >= 0; i--) {
// Read `end` from buffer: u32 at offset 1 of each 4 x u32 entry
const commentEnd = commentsUint32[(i << 2) + 1];
const gap = sourceText.slice(commentEnd, targetStart);
// Ensure that there is nothing except whitespace between the end of the
// current comment and the start of the next one as we iterate backwards
if (WHITESPACE_ONLY_REGEXP.test(gap)) {
sliceStart = i;
// Read `start` from buffer
targetStart = commentsUint32[i << 2];
} else {
break;
}
const targetStart = nodeOrToken.range[0];

// Binary search merged buffer for first entry at or after target's start
const searchIndex = firstTokenAtOrAfter(
tokensAndCommentsUint32,
targetStart,
0,
tokensAndCommentsLen,
);

// Walk backwards over consecutive comments.
// Operate in pos32 space: `typePos32` points directly at the type field, decrementing by 4 each step,
// instead of recomputing `(i << 2) + 2` per iteration.
const startTypePos32 =
(searchIndex << MERGED_SIZE32_SHIFT) - (MERGED_SIZE32 - MERGED_TYPE_OFFSET32);
let typePos32 = startTypePos32;
// `MERGED_TYPE_OFFSET32` is greater than 0 (checked by debug assert above), so `typePos32 > 0` is right check.
// If `MERGED_TYPE_OFFSET32` was zero, it'd be `typePos32 >= 0`.
while (typePos32 > 0 && tokensAndCommentsUint32[typePos32] !== MERGED_TYPE_TOKEN) {
typePos32 -= MERGED_SIZE32;
}

// Deserialize only the comments we're returning
const count32 = startTypePos32 - typePos32;
if (count32 === 0) return [];

// Read `originalIndex` of earliest comment, calculate slice end from how far we walked.
// `typePos32` is at the entry before the first comment.
const sliceStart =
tokensAndCommentsUint32[
typePos32 + (MERGED_SIZE32 - MERGED_TYPE_OFFSET32 + MERGED_ORIGINAL_INDEX_OFFSET32)
];
const sliceEnd = sliceStart + (count32 >> MERGED_SIZE32_SHIFT);

for (let i = sliceStart; i < sliceEnd; i++) {
deserializeCommentIfNeeded(i);
}
Expand All @@ -104,35 +121,43 @@ export function getCommentsBefore(nodeOrToken: NodeOrToken): Comment[] {
* @returns Array of `Comment`s in occurrence order.
*/
export function getCommentsAfter(nodeOrToken: NodeOrToken): Comment[] {
if (commentsUint32 === null) initCommentsBuffer();
debugAssertIsNonNull(commentsUint32);
debugAssertIsNonNull(sourceText);
if (tokensAndCommentsUint32 === null) initTokensAndCommentsBuffer();
debugAssertIsNonNull(tokensAndCommentsUint32);

// Early exit for files with no comments
if (commentsLen === 0) return [];

let targetEnd = nodeOrToken.range[1]; // end

// Binary search for first comment at or past `nodeOrToken`'s end.
// Comments from this index onwards are candidates to be included in returned array.
const sliceStart = firstTokenAtOrAfter(commentsUint32, targetEnd, 0, commentsLen);

let sliceEnd = 0;
for (let i = sliceStart; i < commentsLen; i++) {
// Ensure that there is nothing except whitespace between the
// end of the previous comment and the start of the current one
const commentStart = commentsUint32[i << 2];
const gap = sourceText.slice(targetEnd, commentStart);
if (WHITESPACE_ONLY_REGEXP.test(gap)) {
sliceEnd = i + 1;
// Read `end` from buffer
targetEnd = commentsUint32[(i << 2) + 1];
} else {
break;
}
const targetEnd = nodeOrToken.range[1];

// Binary search merged buffer for first entry at or after target's end.
const searchIndex = firstTokenAtOrAfter(
tokensAndCommentsUint32,
targetEnd,
0,
tokensAndCommentsLen,
);

// Walk forwards over consecutive comments.
// Operate in pos32 space: `typePos32` points directly at the type field, incrementing by 4 each step,
// instead of recomputing `(i << 2) + 2` per iteration.
// No explicit bounds check is needed - a sentinel `MERGED_TYPE_TOKEN` entry is written after the last
// valid entry in `initTokensAndCommentsBuffer`, so the loop terminates naturally.
const startTypePos32 = (searchIndex << MERGED_SIZE32_SHIFT) + MERGED_TYPE_OFFSET32;
let typePos32 = startTypePos32;
while (tokensAndCommentsUint32[typePos32] !== MERGED_TYPE_TOKEN) {
typePos32 += MERGED_SIZE32;
}

// Deserialize only the comments we're returning
const count32 = typePos32 - startTypePos32;
if (count32 === 0) return [];

// Read `originalIndex` of earliest comment, calculate slice end from how far we walked
const sliceStart =
tokensAndCommentsUint32[
startTypePos32 - (MERGED_TYPE_OFFSET32 - MERGED_ORIGINAL_INDEX_OFFSET32)
];
const sliceEnd = sliceStart + (count32 >> MERGED_SIZE32_SHIFT);

for (let i = sliceStart; i < sliceEnd; i++) {
deserializeCommentIfNeeded(i);
}
Expand Down
19 changes: 13 additions & 6 deletions apps/oxlint/src-js/plugins/tokens_and_comments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,18 @@ export type TokenOrComment = Token | Comment;
//
// These constants define the shape of the data stored in `tokensAndCommentsUint32` as per the above.
const MERGED_SIZE = 16;
const MERGED_SIZE32_SHIFT = 2; // 4 x u32s per entry (16 bytes)
const MERGED_SIZE32 = 1 << MERGED_SIZE32_SHIFT; // 4 x u32s per entry
export const MERGED_SIZE32_SHIFT = 2; // 4 x u32s per entry (16 bytes)
export const MERGED_SIZE32 = 1 << MERGED_SIZE32_SHIFT; // 4 x u32s per entry
debugAssert(MERGED_SIZE === MERGED_SIZE32 * 4);
debugAssert(MERGED_SIZE === COMMENT_SIZE, "Size of token, comment, and merged entry must be equal");

const MERGED_ORIGINAL_INDEX_OFFSET32 = 1; // u32 index of the `original_index` field within an entry
const MERGED_TYPE_OFFSET32 = 2; // u32 index of the `is_comment` field within an entry
export const MERGED_ORIGINAL_INDEX_OFFSET32 = 1; // u32 index of the `original_index` field within an entry
export const MERGED_TYPE_OFFSET32 = 2; // u32 index of the `is_comment` field within an entry

// Type of merged entry.
// "Poor man's enum" which optimizes better than a TS enum.
type MergedType = typeof MERGED_TYPE_TOKEN | typeof MERGED_TYPE_COMMENT;
const MERGED_TYPE_TOKEN = 0;
export const MERGED_TYPE_TOKEN = 0;
const MERGED_TYPE_COMMENT = 1;

// Cached `tokensAndComments` array, returned by `getTokensAndComments`.
Expand Down Expand Up @@ -125,7 +125,8 @@ export function initTokensAndCommentsBuffer(): void {
// for all files, so we avoid allocating a large buffer each time.
// `Uint32Array`s can't grow in place, so allocate a new one.
// First allocation uses minimum capacity. Subsequent growths double, to avoid frequent reallocations.
const requiredLen32 = tokensAndCommentsLen << MERGED_SIZE32_SHIFT;
// +1 entry for sentinel (see below).
const requiredLen32 = (tokensAndCommentsLen + 1) << MERGED_SIZE32_SHIFT;
const backingLen = tokensAndCommentsBackingUint32.length;
if (backingLen < requiredLen32) {
tokensAndCommentsBackingUint32 = new Uint32Array(
Expand All @@ -144,6 +145,12 @@ export function initTokensAndCommentsBuffer(): void {
mergeTokensAndComments(tokensUint32, commentsUint32);
}

// Write a sentinel `MERGED_TYPE_TOKEN` entry immediately after the last valid entry.
// This allows `getCommentsAfter`'s forward walk to terminate without an explicit bounds check
// against `tokensAndCommentsLen` on every iteration - the sentinel acts as a natural stop.
tokensAndCommentsUint32[(tokensAndCommentsLen << MERGED_SIZE32_SHIFT) + MERGED_TYPE_OFFSET32] =
MERGED_TYPE_TOKEN;

debugCheckMergedOrder();
}

Expand Down
Loading