Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions apps/oxlint/src-js/generated/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ export const COMMENT_SIZE = 16;
*/
export const COMMENT_KIND_OFFSET = 12;

/**
* Byte offset of the deserialized flag within each token/comment entry.
*
* Corresponds to `content` field of `Comment` struct, and unused bytes in `Token`.
* Initialized to 0 by Rust. JS side sets to 1 after deserialization.
*/
export const DESERIALIZED_FLAG_OFFSET = 15;

/**
* Discriminant value for `CommentKind::Line`.
*/
Expand Down
235 changes: 186 additions & 49 deletions apps/oxlint/src-js/plugins/comments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ import {
COMMENT_KIND_OFFSET,
COMMENT_LINE_KIND,
DATA_POINTER_POS_32,
DESERIALIZED_FLAG_OFFSET,
} from "../generated/constants.ts";
import { computeLoc } from "./location.ts";
import { FLAG_NOT_DESERIALIZED, FLAG_DESERIALIZED } from "./tokens.ts";
import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts";

import type { Location, Span } from "./location.ts";
Expand All @@ -31,9 +33,20 @@ export type { CommentType as Comment };
// Created lazily only when needed.
export let comments: CommentType[] | null = null;

// Typed array views over the comments region of the buffer.
// These persist for the lifetime of the file (cleared in `resetComments`).
let commentsUint8: Uint8Array | null = null;
export let commentsUint32: Uint32Array | null = null;

// Number of comments for the current file.
export let commentsLen = 0;

// Whether all comments have been deserialized into `cachedComments`.
export let allCommentsDeserialized = false;

// Cached comment objects, reused across files to reduce GC pressure.
// Comments are mutated in place during deserialization, then `comments` is set to a slice of this array.
const cachedComments: Comment[] = [];
export const cachedComments: Comment[] = [];

// Comments array from previous file.
// Reused for next file if next file has fewer comments than the previous file (by truncating to correct length).
Expand All @@ -44,7 +57,14 @@ const commentsWithLoc: Comment[] = [];

// Empty comments array.
// Reused for all files which don't have any comments. Frozen to avoid rules mutating it.
const emptyComments: CommentType[] = Object.freeze([]) as unknown as CommentType[];
const EMPTY_COMMENTS: CommentType[] = Object.freeze([]) as unknown as CommentType[];

// Empty typed arrays, reused for files with no comments.
const EMPTY_UINT8_ARRAY = new Uint8Array(0);
const EMPTY_UINT32_ARRAY = new Uint32Array(0);

const COMMENT_SIZE_SHIFT = 4; // 1 << 4 == 16 bytes, the size of `Comment` in Rust
debugAssert(COMMENT_SIZE === 1 << COMMENT_SIZE_SHIFT);

// Reset `#loc` field on a `Comment` class instance.
let resetCommentLoc: (comment: Comment) => void;
Expand Down Expand Up @@ -86,92 +106,177 @@ class Comment implements Span {
Object.defineProperty(Comment.prototype, "loc", { enumerable: true });

/**
* Initialize comments for current file.
*
* Deserializes comments from the buffer using object pooling.
* If the program has a hashbang, sets first comment to a `Shebang` comment.
* Deserialize all comments and build the `comments` array.
* Called by `ast.comments` getter.
*/
export function initComments(): void {
debugAssert(comments === null, "Comments already initialized");
debugAssert(comments === null, "Comments already deserialized");

if (!allCommentsDeserialized) deserializeComments();

// `initCommentsBuffer` (called by `deserializeComments`) sets `comments` for zero-comment files
if (comments !== null) return;

// Create `comments` array as a slice of `cachedComments` array.
//
// Use `slice` rather than copying comments one-by-one into a new array.
// V8 implements `slice` with a single `memcpy` of the backing store, which is faster
// than N individual `push` calls with bounds checking and potential resizing.
//
// If the comments array from previous file is longer than the current one,
// reuse it and truncate it to avoid the memcpy entirely.
// Assuming random distribution of number of comments in files, this cheaper branch should be hit on 50% of files.
if (previousComments.length >= commentsLen) {
previousComments.length = commentsLen;
comments = previousComments;
} else {
comments = previousComments = cachedComments.slice(0, commentsLen);
}
}

/**
* Deserialize all comments into `cachedComments`.
* Does NOT build the `comments` array - use `initComments` for that.
*/
export function deserializeComments(): void {
debugAssert(!allCommentsDeserialized, "Comments already deserialized");

if (commentsUint32 === null) initCommentsBuffer();

for (let i = 0; i < commentsLen; i++) {
deserializeCommentIfNeeded(i);
}

allCommentsDeserialized = true;

debugCheckDeserializedComments();
}

/**
* Initialize typed array views over the comments region of the buffer.
*
* Populates `commentsUint8`, `commentsUint32`, and `commentsLen`, and grows `cachedComments` if needed.
* Does NOT deserialize comments - they are deserialized lazily via `deserializeCommentIfNeeded`.
*
* Exception: If the file has a hashbang, eagerly deserializes the first comment and sets its type to `Shebang`.
*/
export function initCommentsBuffer(): void {
debugAssert(
commentsUint8 === null && commentsUint32 === null,
"Comments buffer already initialized",
);

debugAssertIsNonNull(buffer);

// We don't need to deserialize source text if there are no comments, so we could move this call to after
// the `commentsLen === 0` check. However, various comments methods rely on that if `initComments` has been called,
// then `sourceText` is initialized. Doing it eagerly here avoids having to check if `sourceText` is `null`
// in all those methods, which can be called quite frequently.
if (sourceText === null) initSourceText();
debugAssertIsNonNull(sourceText);
debugAssertIsNonNull(buffer);

const { uint32 } = buffer;
const programPos32 = uint32[DATA_POINTER_POS_32] >> 2;
const commentsPos = uint32[programPos32 + (COMMENTS_OFFSET >> 2)];
const commentsLen = uint32[programPos32 + (COMMENTS_LEN_OFFSET >> 2)];
commentsLen = uint32[programPos32 + (COMMENTS_LEN_OFFSET >> 2)];

// Fast path for files with no comments
if (commentsLen === 0) {
comments = emptyComments;
comments = EMPTY_COMMENTS;
commentsUint8 = EMPTY_UINT8_ARRAY;
commentsUint32 = EMPTY_UINT32_ARRAY;
allCommentsDeserialized = true;
return;
}

// Create typed array views over the comments region of the buffer.
// These are zero-copy views over the same underlying `ArrayBuffer`.
const arrayBuffer = buffer.buffer,
absolutePos = buffer.byteOffset + commentsPos;
commentsUint8 = new Uint8Array(arrayBuffer, absolutePos, commentsLen * COMMENT_SIZE);
commentsUint32 = new Uint32Array(arrayBuffer, absolutePos, commentsLen * (COMMENT_SIZE >> 2));

// Grow cache if needed (one-time cost as cache warms up)
while (cachedComments.length < commentsLen) {
cachedComments.push(new Comment());
}

// Deserialize comments from buffer
for (let i = 0; i < commentsLen; i++) {
const comment = cachedComments[i];
// If file has a hashbang, eagerly deserialize the first comment, and set its type to `Shebang`.
// We do this here instead of lazily when comment 0 is deserialized, to remove code
// from `deserializeCommentIfNeeded`, which can be called many times.
// Rust side adds hashbang comment to start of comments `Vec` as a `Line` comment.
// `commentsUint32[0]` is the start of the first comment.
if (commentsUint32[0] === 0 && sourceText.startsWith("#!")) {
getComment(0).type = "Shebang";
}

const pos = commentsPos + i * COMMENT_SIZE,
pos32 = pos >> 2;
// Check buffer data has valid ranges and ascending order
debugCheckValidRanges();
}

const start = uint32[pos32];
const end = uint32[pos32 + 1];
const isBlock = buffer[pos + COMMENT_KIND_OFFSET] !== COMMENT_LINE_KIND;
/**
* Get comment at `index`, deserializing if needed.
*
* Caller must ensure `initCommentsBuffer()` has been called before calling this function.
*
* @param index - Comment index in the comments buffer
* @returns Deserialized comment
*/
export function getComment(index: number): CommentType {
const comment = deserializeCommentIfNeeded(index);
return comment === null ? cachedComments[index] : comment;
}

comment.type = isBlock ? "Block" : "Line";
// Line comments: `// text` -> slice `start + 2..end`
// Block comments: `/* text */` -> slice `start + 2..end - 2`
comment.value = sourceText.slice(start + 2, end - (+isBlock << 1));
comment.range[0] = comment.start = start;
comment.range[1] = comment.end = end;
}
/**
* Deserialize comment at `index` if not already deserialized.
*
* Caller must ensure `initCommentsBuffer()` has been called before calling this function.
*
* @param index - Comment index in the comments buffer
* @returns `Comment` object if newly deserialized, or `null` if already deserialized
*/
export function deserializeCommentIfNeeded(index: number): Comment | null {
const pos = index << COMMENT_SIZE_SHIFT;

// Set first comment as `Shebang` if file has hashbang.
// Rust side adds hashbang comment to start of comments `Vec` as a `Line` comment.
// `uint32[commentsPos >> 2]` is the start of the first comment.
if (uint32[commentsPos >> 2] === 0 && sourceText.startsWith("#!")) {
cachedComments[0].type = "Shebang";
}
// Fast path: If already deserialized, exit
const flagPos = pos + DESERIALIZED_FLAG_OFFSET;
if (commentsUint8![flagPos] !== FLAG_NOT_DESERIALIZED) return null;

// Use `slice` rather than copying comments one-by-one into a new array.
// V8 implements `slice` with a single `memcpy` of the backing store, which is faster
// than N individual `push` calls with bounds checking and potential resizing.
//
// If the comments array from previous file is longer than the current one,
// reuse it and truncate it to avoid the memcpy entirely.
if (previousComments.length >= commentsLen) {
previousComments.length = commentsLen;
comments = previousComments;
} else {
comments = previousComments = cachedComments.slice(0, commentsLen);
}
// Mark comment as deserialized, so it won't be deserialized again
commentsUint8![flagPos] = FLAG_DESERIALIZED;

// Deserialize comment into a cached `Comment` object
const comment = cachedComments[index];

const isBlock = commentsUint8![pos + COMMENT_KIND_OFFSET] !== COMMENT_LINE_KIND;

// Check `comments` have valid ranges and are in ascending order
debugCheckValidRanges(comments);
const pos32 = pos >> 2,
start = commentsUint32![pos32],
end = commentsUint32![pos32 + 1];

comment.type = isBlock ? "Block" : "Line";
// Line comments: `// text` -> slice `start + 2..end`
// Block comments: `/* text */` -> slice `start + 2..end - 2`
comment.value = sourceText!.slice(start + 2, end - (+isBlock << 1));
comment.range[0] = comment.start = start;
comment.range[1] = comment.end = end;

return comment;
}

/**
* Check comments have valid ranges and are in ascending order.
* Check comments buffer has valid ranges and ascending order.
*
* Only runs in debug build (tests). In release build, this function is entirely removed by minifier.
*/
function debugCheckValidRanges(commentsArr: CommentType[]): void {
function debugCheckValidRanges(): void {
if (!DEBUG) return;

let lastEnd = 0;
for (const comment of commentsArr) {
const { start, end } = comment;
for (let i = 0; i < commentsLen; i++) {
const pos32 = i << 2;
const start = commentsUint32![pos32];
const end = commentsUint32![pos32 + 1];
if (end <= start) throw new Error(`Invalid comment range: ${start}-${end}`);
if (start < lastEnd) {
throw new Error(`Overlapping comments: last end: ${lastEnd}, next start: ${start}`);
Expand All @@ -184,6 +289,34 @@ function debugCheckValidRanges(commentsArr: CommentType[]): void {
}
}

/**
* Check all deserialized comments are in ascending order.
*
* Only runs in debug build (tests). In release build, this function is entirely removed by minifier.
*/
function debugCheckDeserializedComments(): void {
if (!DEBUG) return;

let lastEnd = 0;
for (let i = 0; i < commentsLen; i++) {
const flagPos = (i << COMMENT_SIZE_SHIFT) + DESERIALIZED_FLAG_OFFSET;
if (commentsUint8![flagPos] !== FLAG_DESERIALIZED) {
throw new Error(
`Comment ${i} not marked as deserialized after \`deserializeComments()\` call`,
);
}

const { start, end } = cachedComments[i];
if (end <= start) throw new Error(`Invalid deserialized comment range: ${start}-${end}`);
if (start < lastEnd) {
throw new Error(
`Deserialized comments not in order: last end: ${lastEnd}, next start: ${start}`,
);
}
lastEnd = end;
}
}

/**
* Reset comments after file has been linted.
*
Expand All @@ -197,4 +330,8 @@ export function resetComments(): void {
commentsWithLoc.length = 0;

comments = null;
commentsUint8 = null;
commentsUint32 = null;
commentsLen = 0;
allCommentsDeserialized = false;
}
Loading
Loading