diff --git a/apps/oxlint/src-js/plugins/context.ts b/apps/oxlint/src-js/plugins/context.ts index a3737afd4cfa9..0a88a5fa0bfbc 100644 --- a/apps/oxlint/src-js/plugins/context.ts +++ b/apps/oxlint/src-js/plugins/context.ts @@ -1,5 +1,6 @@ import { getFixes } from './fix.js'; -import { getIndexFromLoc, SOURCE_CODE } from './source_code.js'; +import { getOffsetFromLineColumn } from './location.js'; +import { SOURCE_CODE } from './source_code.js'; import type { Fix, FixFn } from './fix.ts'; import type { SourceCode } from './source_code.ts'; @@ -148,8 +149,8 @@ export class Context { if (hasOwn(diagnostic, 'loc') && (loc = (diagnostic as DiagnosticWithLoc).loc) != null) { // `loc` if (typeof loc !== 'object') throw new TypeError('`loc` must be an object'); - start = getIndexFromLoc(loc.start); - end = getIndexFromLoc(loc.end); + start = getOffsetFromLineColumn(loc.start); + end = getOffsetFromLineColumn(loc.end); } else { // `node` const { node } = diagnostic as DiagnosticWithNode; diff --git a/apps/oxlint/src-js/plugins/location.ts b/apps/oxlint/src-js/plugins/location.ts new file mode 100644 index 0000000000000..a24fa270eead4 --- /dev/null +++ b/apps/oxlint/src-js/plugins/location.ts @@ -0,0 +1,140 @@ +import { initSourceText, sourceText } from './source_code.js'; + +import type { LineColumn } from './types.ts'; + +// Pattern for splitting source text into lines +const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu; + +// Lazily populated when `SOURCE_CODE.lines` is accessed. +// `lineStartOffsets` starts as `[0]`, and `resetLines` doesn't remove that initial element, so it's never empty. +export const lines: string[] = []; +const lineStartOffsets: number[] = [0]; + +/** + * Split source text into lines. + */ +export function initLines(): void { + if (sourceText === null) initSourceText(); + + // This implementation is based on the one in ESLint. + // TODO: Investigate if using `String.prototype.matchAll` is faster. + // This comment is above ESLint's implementation: + /* + * Previously, this was implemented using a regex that + * matched a sequence of non-linebreak characters followed by a + * linebreak, then adding the lengths of the matches. However, + * this caused a catastrophic backtracking issue when the end + * of a file contained a large number of non-newline characters. + * To avoid this, the current implementation just matches newlines + * and uses match.index to get the correct line start indices. + */ + + // Note: `lineStartOffsets` starts as `[0]` + let lastOffset = 0, offset, match; + while ((match = LINE_BREAK_PATTERN.exec(sourceText))) { + offset = match.index; + lines.push(sourceText.slice(lastOffset, offset)); + lineStartOffsets.push(lastOffset = offset + match[0].length); + } + lines.push(sourceText.slice(lastOffset)); +} + +/** + * Reset lines after file has been linted, to free memory. + */ +export function resetLines(): void { + lines.length = 0; + // Leave first entry (0) in place, discard the rest + lineStartOffsets.length = 1; +} + +/** + * Convert a source text index into a (line, column) pair. + * @param offset - The index of a character in a file. + * @returns `{line, column}` location object with 1-indexed line and 0-indexed column. + * @throws {TypeError|RangeError} If non-numeric `offset`, or `offset` out of range. + */ +export function getLineColumnFromOffset(offset: number): LineColumn { + if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) { + throw new TypeError('Expected `offset` to be a non-negative integer.'); + } + + // Build `lines` and `lineStartOffsets` tables if they haven't been already. + // This also decodes `sourceText` if it wasn't already. + if (lines.length === 0) initLines(); + + if (offset > sourceText.length) { + throw new RangeError( + `Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`, + ); + } + + // Binary search `lineStartOffsets` for the line containing `offset` + let low = 0, high = lineStartOffsets.length, mid: number; + do { + mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division + if (offset < lineStartOffsets[mid]) { + high = mid; + } else { + low = mid + 1; + } + } while (low < high); + + return { line: low, column: offset - lineStartOffsets[low - 1] }; +} + +/** + * Convert a `{ line, column }` pair into a range index. + * @param loc - A line/column location. + * @returns The character index of the location in the file. + * @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`, + * or if the `line` is less than or equal to zero, or the line or column is out of the expected range. + */ +export function getOffsetFromLineColumn(loc: LineColumn): number { + if (loc !== null && typeof loc === 'object') { + const { line, column } = loc; + if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) { + // Build `lines` and `lineStartOffsets` tables if they haven't been already. + // This also decodes `sourceText` if it wasn't already. + if (lines.length === 0) initLines(); + + const linesCount = lineStartOffsets.length; + if (line <= 0 || line > linesCount) { + throw new RangeError( + `Line number out of range (line ${line} requested). ` + + `Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`, + ); + } + if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`); + + const lineOffset = lineStartOffsets[line - 1]; + const offset = lineOffset + column; + + // Comment from ESLint implementation: + /* + * By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of + * the given line, provided that the line number is valid element of `lines`. Since the + * last element of `lines` is an empty string for files with trailing newlines, add a + * special case where getting the index for the first location after the end of the file + * will return the length of the file, rather than throwing an error. This allows rules to + * use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file. + */ + + let nextLineOffset; + if (line === linesCount) { + nextLineOffset = sourceText.length; + if (offset <= nextLineOffset) return offset; + } else { + nextLineOffset = lineStartOffsets[line]; + if (offset < nextLineOffset) return offset; + } + + throw new RangeError( + `Column number out of range (column ${column} requested, ` + + `but the length of line ${line} is ${nextLineOffset - lineOffset}).`, + ); + } + } + + throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.'); +} diff --git a/apps/oxlint/src-js/plugins/source_code.ts b/apps/oxlint/src-js/plugins/source_code.ts index ae848bcbefe09..6902118e50897 100644 --- a/apps/oxlint/src-js/plugins/source_code.ts +++ b/apps/oxlint/src-js/plugins/source_code.ts @@ -8,18 +8,16 @@ import { // @ts-expect-error we need to generate `.d.ts` file for this module // We use the deserializer which removes `ParenthesizedExpression`s from AST to match ESLint import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts_range_parent_no_parens.js'; +import { getLineColumnFromOffset, getOffsetFromLineColumn, initLines, lines, resetLines } from './location.js'; import type { Program } from '@oxc-project/types'; import type { Scope, ScopeManager, Variable } from './scope.ts'; -import type { BufferWithArrays, Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts'; +import type { BufferWithArrays, Comment, Node, NodeOrToken, Token } from './types.ts'; const require = createRequire(import.meta.url); const { max } = Math; -// Pattern for splitting source text into lines -const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu; - // Text decoder, for decoding source text from buffer const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true }); @@ -31,15 +29,10 @@ let hasBOM = false; // Lazily populated when `SOURCE_CODE.text` or `SOURCE_CODE.ast` is accessed, // or `initAst()` is called before the AST is walked. -let sourceText: string | null = null; +export let sourceText: string | null = null; let sourceByteLen: number = 0; export let ast: Program | null = null; -// Lazily populated when `SOURCE_CODE.lines` is accessed. -// `lineStartOffsets` starts as `[0]`, and `resetSource` doesn't remove that initial element, so it's never empty. -const lines: string[] = [], - lineStartOffsets: number[] = [0]; - // Lazily populated when `SOURCE_CODE.visitorKeys` is accessed. let visitorKeys: { [key: string]: string[] } | null = null; @@ -56,7 +49,7 @@ export function setupSourceForFile(bufferInput: BufferWithArrays, hasBOMInput: b /** * Decode source text from buffer. */ -function initSourceText(): void { +export function initSourceText(): void { const { uint32 } = buffer, programPos = uint32[DATA_POINTER_POS_32]; sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2]; @@ -71,35 +64,6 @@ export function initAst(): void { ast = deserializeProgramOnly(buffer, sourceText, sourceByteLen); } -/** - * Split source text into lines. - */ -function initLines(): void { - if (sourceText === null) initSourceText(); - - // This implementation is based on the one in ESLint. - // TODO: Investigate if using `String.prototype.matchAll` is faster. - // This comment is above ESLint's implementation: - /* - * Previously, this was implemented using a regex that - * matched a sequence of non-linebreak characters followed by a - * linebreak, then adding the lengths of the matches. However, - * this caused a catastrophic backtracking issue when the end - * of a file contained a large number of non-newline characters. - * To avoid this, the current implementation just matches newlines - * and uses match.index to get the correct line start indices. - */ - - // Note: `lineStartOffsets` starts as `[0]` - let lastOffset = 0, offset, match; - while ((match = LINE_BREAK_PATTERN.exec(sourceText))) { - offset = match.index; - lines.push(sourceText.slice(lastOffset, offset)); - lineStartOffsets.push(lastOffset = offset + match[0].length); - } - lines.push(sourceText.slice(lastOffset)); -} - /** * Reset source after file has been linted, to free memory. * @@ -114,8 +78,7 @@ export function resetSource(): void { buffer = null; sourceText = null; ast = null; - lines.length = 0; - lineStartOffsets.length = 1; + resetLines(); } // `SourceCode` object. @@ -495,8 +458,8 @@ export const SOURCE_CODE = Object.freeze({ throw new Error('`sourceCode.getNodeByRangeIndex` not implemented yet'); // TODO }, - getLocFromIndex, - getIndexFromLoc, + getLocFromIndex: getLineColumnFromOffset, + getIndexFromLoc: getOffsetFromLineColumn, /** * Check whether any comments exist or not between the given 2 nodes. @@ -546,97 +509,6 @@ export const SOURCE_CODE = Object.freeze({ export type SourceCode = typeof SOURCE_CODE; -/** - * Convert a source text index into a (line, column) pair. - * @param offset The index of a character in a file. - * @returns `{line, column}` location object with 1-indexed line and 0-indexed column. - * @throws {TypeError|RangeError} If non-numeric `index`, or `index` out of range. - */ -function getLocFromIndex(offset: number): LineColumn { - if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) { - throw new TypeError('Expected `offset` to be a non-negative integer.'); - } - - // Build `lines` and `lineStartOffsets` tables if they haven't been already. - // This also decodes `sourceText` if it wasn't already. - if (lines.length === 0) initLines(); - - if (offset > sourceText.length) { - throw new RangeError( - `Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`, - ); - } - - // Binary search `lineStartOffsets` for the line containing `offset` - let low = 0, high = lineStartOffsets.length, mid: number; - do { - mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division - if (offset < lineStartOffsets[mid]) { - high = mid; - } else { - low = mid + 1; - } - } while (low < high); - - return { line: low, column: offset - lineStartOffsets[low - 1] }; -} - -/** - * Convert a `{ line, column }` pair into a range index. - * @param loc - A line/column location. - * @returns The range index of the location in the file. - * @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`, - * or if the `line` is less than or equal to zero, or the line or column is out of the expected range. - */ -export function getIndexFromLoc(loc: LineColumn): number { - if (loc !== null && typeof loc === 'object') { - const { line, column } = loc; - if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) { - // Build `lines` and `lineStartOffsets` tables if they haven't been already. - // This also decodes `sourceText` if it wasn't already. - if (lines.length === 0) initLines(); - - const linesCount = lineStartOffsets.length; - if (line <= 0 || line > linesCount) { - throw new RangeError( - `Line number out of range (line ${line} requested). ` + - `Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`, - ); - } - if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`); - - const lineOffset = lineStartOffsets[line - 1]; - const offset = lineOffset + column; - - // Comment from ESLint implementation: - /* - * By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of - * the given line, provided that the line number is valid element of `lines`. Since the - * last element of `lines` is an empty string for files with trailing newlines, add a - * special case where getting the index for the first location after the end of the file - * will return the length of the file, rather than throwing an error. This allows rules to - * use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file. - */ - - let nextLineOffset; - if (line === linesCount) { - nextLineOffset = sourceText.length; - if (offset <= nextLineOffset) return offset; - } else { - nextLineOffset = lineStartOffsets[line]; - if (offset < nextLineOffset) return offset; - } - - throw new RangeError( - `Column number out of range (column ${column} requested, ` + - `but the length of line ${line} is ${nextLineOffset - lineOffset}).`, - ); - } - } - - throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.'); -} - /** * Get all the ancestors of a given node. * @param node - AST node