Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions apps/oxlint/src-js/plugins/context.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { getFixes } from './fix.js';
import { getIndexFromLoc, SOURCE_CODE } from './source_code.js';
import { getOffsetFromLineColumn } from './location.js';
import { SOURCE_CODE } from './source_code.js';

import type { Fix, FixFn } from './fix.ts';
import type { SourceCode } from './source_code.ts';
Expand Down Expand Up @@ -148,8 +149,8 @@ export class Context {
if (hasOwn(diagnostic, 'loc') && (loc = (diagnostic as DiagnosticWithLoc).loc) != null) {
// `loc`
if (typeof loc !== 'object') throw new TypeError('`loc` must be an object');
start = getIndexFromLoc(loc.start);
end = getIndexFromLoc(loc.end);
start = getOffsetFromLineColumn(loc.start);
end = getOffsetFromLineColumn(loc.end);
} else {
// `node`
const { node } = diagnostic as DiagnosticWithNode;
Expand Down
140 changes: 140 additions & 0 deletions apps/oxlint/src-js/plugins/location.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import { initSourceText, sourceText } from './source_code.js';

import type { LineColumn } from './types.ts';

// Pattern for splitting source text into lines
const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu;

// Lazily populated when `SOURCE_CODE.lines` is accessed.
// `lineStartOffsets` starts as `[0]`, and `resetLines` doesn't remove that initial element, so it's never empty.
export const lines: string[] = [];
const lineStartOffsets: number[] = [0];

/**
* Split source text into lines.
*/
export function initLines(): void {
if (sourceText === null) initSourceText();

// This implementation is based on the one in ESLint.
// TODO: Investigate if using `String.prototype.matchAll` is faster.
// This comment is above ESLint's implementation:
/*
* Previously, this was implemented using a regex that
* matched a sequence of non-linebreak characters followed by a
* linebreak, then adding the lengths of the matches. However,
* this caused a catastrophic backtracking issue when the end
* of a file contained a large number of non-newline characters.
* To avoid this, the current implementation just matches newlines
* and uses match.index to get the correct line start indices.
*/

// Note: `lineStartOffsets` starts as `[0]`
let lastOffset = 0, offset, match;
while ((match = LINE_BREAK_PATTERN.exec(sourceText))) {
offset = match.index;
lines.push(sourceText.slice(lastOffset, offset));
lineStartOffsets.push(lastOffset = offset + match[0].length);
}
lines.push(sourceText.slice(lastOffset));
}

/**
* Reset lines after file has been linted, to free memory.
*/
export function resetLines(): void {
lines.length = 0;
// Leave first entry (0) in place, discard the rest
lineStartOffsets.length = 1;
}

/**
* Convert a source text index into a (line, column) pair.
* @param offset - The index of a character in a file.
* @returns `{line, column}` location object with 1-indexed line and 0-indexed column.
* @throws {TypeError|RangeError} If non-numeric `offset`, or `offset` out of range.
*/
export function getLineColumnFromOffset(offset: number): LineColumn {
if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) {
throw new TypeError('Expected `offset` to be a non-negative integer.');
}

// Build `lines` and `lineStartOffsets` tables if they haven't been already.
// This also decodes `sourceText` if it wasn't already.
if (lines.length === 0) initLines();

if (offset > sourceText.length) {
throw new RangeError(
`Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`,
);
}

// Binary search `lineStartOffsets` for the line containing `offset`
let low = 0, high = lineStartOffsets.length, mid: number;
do {
mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division
if (offset < lineStartOffsets[mid]) {
high = mid;
} else {
low = mid + 1;
}
} while (low < high);

return { line: low, column: offset - lineStartOffsets[low - 1] };
}

/**
* Convert a `{ line, column }` pair into a range index.
* @param loc - A line/column location.
* @returns The character index of the location in the file.
* @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`,
* or if the `line` is less than or equal to zero, or the line or column is out of the expected range.
*/
export function getOffsetFromLineColumn(loc: LineColumn): number {
if (loc !== null && typeof loc === 'object') {
const { line, column } = loc;
if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) {
// Build `lines` and `lineStartOffsets` tables if they haven't been already.
// This also decodes `sourceText` if it wasn't already.
if (lines.length === 0) initLines();

const linesCount = lineStartOffsets.length;
if (line <= 0 || line > linesCount) {
throw new RangeError(
`Line number out of range (line ${line} requested). ` +
`Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`,
);
}
if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`);

const lineOffset = lineStartOffsets[line - 1];
const offset = lineOffset + column;

// Comment from ESLint implementation:
/*
* By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of
* the given line, provided that the line number is valid element of `lines`. Since the
* last element of `lines` is an empty string for files with trailing newlines, add a
* special case where getting the index for the first location after the end of the file
* will return the length of the file, rather than throwing an error. This allows rules to
* use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file.
*/

let nextLineOffset;
if (line === linesCount) {
nextLineOffset = sourceText.length;
if (offset <= nextLineOffset) return offset;
} else {
nextLineOffset = lineStartOffsets[line];
if (offset < nextLineOffset) return offset;
}

throw new RangeError(
`Column number out of range (column ${column} requested, ` +
`but the length of line ${line} is ${nextLineOffset - lineOffset}).`,
);
}
}

throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.');
}
142 changes: 7 additions & 135 deletions apps/oxlint/src-js/plugins/source_code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,16 @@ import {
// @ts-expect-error we need to generate `.d.ts` file for this module
// We use the deserializer which removes `ParenthesizedExpression`s from AST to match ESLint
import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts_range_parent_no_parens.js';
import { getLineColumnFromOffset, getOffsetFromLineColumn, initLines, lines, resetLines } from './location.js';

import type { Program } from '@oxc-project/types';
import type { Scope, ScopeManager, Variable } from './scope.ts';
import type { BufferWithArrays, Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts';
import type { BufferWithArrays, Comment, Node, NodeOrToken, Token } from './types.ts';

const require = createRequire(import.meta.url);

const { max } = Math;

// Pattern for splitting source text into lines
const LINE_BREAK_PATTERN = /\r\n|[\r\n\u2028\u2029]/gu;

// Text decoder, for decoding source text from buffer
const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true });

Expand All @@ -31,15 +29,10 @@ let hasBOM = false;

// Lazily populated when `SOURCE_CODE.text` or `SOURCE_CODE.ast` is accessed,
// or `initAst()` is called before the AST is walked.
let sourceText: string | null = null;
export let sourceText: string | null = null;
let sourceByteLen: number = 0;
export let ast: Program | null = null;

// Lazily populated when `SOURCE_CODE.lines` is accessed.
// `lineStartOffsets` starts as `[0]`, and `resetSource` doesn't remove that initial element, so it's never empty.
const lines: string[] = [],
lineStartOffsets: number[] = [0];

// Lazily populated when `SOURCE_CODE.visitorKeys` is accessed.
let visitorKeys: { [key: string]: string[] } | null = null;

Expand All @@ -56,7 +49,7 @@ export function setupSourceForFile(bufferInput: BufferWithArrays, hasBOMInput: b
/**
* Decode source text from buffer.
*/
function initSourceText(): void {
export function initSourceText(): void {
const { uint32 } = buffer,
programPos = uint32[DATA_POINTER_POS_32];
sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2];
Expand All @@ -71,35 +64,6 @@ export function initAst(): void {
ast = deserializeProgramOnly(buffer, sourceText, sourceByteLen);
}

/**
* Split source text into lines.
*/
function initLines(): void {
if (sourceText === null) initSourceText();

// This implementation is based on the one in ESLint.
// TODO: Investigate if using `String.prototype.matchAll` is faster.
// This comment is above ESLint's implementation:
/*
* Previously, this was implemented using a regex that
* matched a sequence of non-linebreak characters followed by a
* linebreak, then adding the lengths of the matches. However,
* this caused a catastrophic backtracking issue when the end
* of a file contained a large number of non-newline characters.
* To avoid this, the current implementation just matches newlines
* and uses match.index to get the correct line start indices.
*/

// Note: `lineStartOffsets` starts as `[0]`
let lastOffset = 0, offset, match;
while ((match = LINE_BREAK_PATTERN.exec(sourceText))) {
offset = match.index;
lines.push(sourceText.slice(lastOffset, offset));
lineStartOffsets.push(lastOffset = offset + match[0].length);
}
lines.push(sourceText.slice(lastOffset));
}

/**
* Reset source after file has been linted, to free memory.
*
Expand All @@ -114,8 +78,7 @@ export function resetSource(): void {
buffer = null;
sourceText = null;
ast = null;
lines.length = 0;
lineStartOffsets.length = 1;
resetLines();
}

// `SourceCode` object.
Expand Down Expand Up @@ -495,8 +458,8 @@ export const SOURCE_CODE = Object.freeze({
throw new Error('`sourceCode.getNodeByRangeIndex` not implemented yet'); // TODO
},

getLocFromIndex,
getIndexFromLoc,
getLocFromIndex: getLineColumnFromOffset,
getIndexFromLoc: getOffsetFromLineColumn,

/**
* Check whether any comments exist or not between the given 2 nodes.
Expand Down Expand Up @@ -546,97 +509,6 @@ export const SOURCE_CODE = Object.freeze({

export type SourceCode = typeof SOURCE_CODE;

/**
* Convert a source text index into a (line, column) pair.
* @param offset The index of a character in a file.
* @returns `{line, column}` location object with 1-indexed line and 0-indexed column.
* @throws {TypeError|RangeError} If non-numeric `index`, or `index` out of range.
*/
function getLocFromIndex(offset: number): LineColumn {
if (typeof offset !== 'number' || offset < 0 || (offset | 0) !== offset) {
throw new TypeError('Expected `offset` to be a non-negative integer.');
}

// Build `lines` and `lineStartOffsets` tables if they haven't been already.
// This also decodes `sourceText` if it wasn't already.
if (lines.length === 0) initLines();

if (offset > sourceText.length) {
throw new RangeError(
`Index out of range (requested index ${offset}, but source text has length ${sourceText.length}).`,
);
}

// Binary search `lineStartOffsets` for the line containing `offset`
let low = 0, high = lineStartOffsets.length, mid: number;
do {
mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division
if (offset < lineStartOffsets[mid]) {
high = mid;
} else {
low = mid + 1;
}
} while (low < high);

return { line: low, column: offset - lineStartOffsets[low - 1] };
}

/**
* Convert a `{ line, column }` pair into a range index.
* @param loc - A line/column location.
* @returns The range index of the location in the file.
* @throws {TypeError|RangeError} If `loc` is not an object with a numeric `line` and `column`,
* or if the `line` is less than or equal to zero, or the line or column is out of the expected range.
*/
export function getIndexFromLoc(loc: LineColumn): number {
if (loc !== null && typeof loc === 'object') {
const { line, column } = loc;
if (typeof line === 'number' && typeof column === 'number' && (line | 0) === line && (column | 0) === column) {
// Build `lines` and `lineStartOffsets` tables if they haven't been already.
// This also decodes `sourceText` if it wasn't already.
if (lines.length === 0) initLines();

const linesCount = lineStartOffsets.length;
if (line <= 0 || line > linesCount) {
throw new RangeError(
`Line number out of range (line ${line} requested). ` +
`Line numbers should be 1-based, and less than or equal to number of lines in file (${linesCount}).`,
);
}
if (column < 0) throw new RangeError(`Invalid column number (column ${column} requested).`);

const lineOffset = lineStartOffsets[line - 1];
const offset = lineOffset + column;

// Comment from ESLint implementation:
/*
* By design, `getIndexFromLoc({ line: lineNum, column: 0 })` should return the start index of
* the given line, provided that the line number is valid element of `lines`. Since the
* last element of `lines` is an empty string for files with trailing newlines, add a
* special case where getting the index for the first location after the end of the file
* will return the length of the file, rather than throwing an error. This allows rules to
* use `getIndexFromLoc` consistently without worrying about edge cases at the end of a file.
*/

let nextLineOffset;
if (line === linesCount) {
nextLineOffset = sourceText.length;
if (offset <= nextLineOffset) return offset;
} else {
nextLineOffset = lineStartOffsets[line];
if (offset < nextLineOffset) return offset;
}

throw new RangeError(
`Column number out of range (column ${column} requested, ` +
`but the length of line ${line} is ${nextLineOffset - lineOffset}).`,
);
}
}

throw new TypeError('Expected `loc` to be an object with integer `line` and `column` properties.');
}

/**
* Get all the ancestors of a given node.
* @param node - AST node
Expand Down
Loading