Skip to content

Commit

Permalink
Introduce option lazyCompileMin
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Feb 2, 2025
1 parent f0a7385 commit 8529eb5
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 25 deletions.
31 changes: 22 additions & 9 deletions spec/toregexpdetails.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,6 @@ import {toRegExpDetails} from '../dist/esm/index.js';
import {r} from '../src/utils.js';

describe('toRegExpDetails', () => {
it('should return an object with pattern and flags properties', () => {
expect(Object.keys(toRegExpDetails(''))).toEqual(['pattern', 'flags']);
});

it('should return an object with pattern, flags, and options properties when the pattern uses subclass-based emulation', () => {
expect(Object.keys(toRegExpDetails('a++'))).toEqual(['pattern', 'flags', 'options']);
expect(Object.keys(toRegExpDetails(r`(^|\G)`))).toEqual(['pattern', 'flags', 'options']);
});

it('should throw for non-string patterns', () => {
expect(() => toRegExpDetails()).toThrow();
for (const value of [undefined, null, 0, false, [], {}, /(?:)/]) {
Expand All @@ -19,6 +10,28 @@ describe('toRegExpDetails', () => {
});

it('should return an empty pattern if given an empty string', () => {
// Not `(?:)` like `new RegExp('').source`
expect(toRegExpDetails('').pattern).toBe('');
});

describe('result properties', () => {
const props = ['pattern', 'flags'];
const extProps = ['pattern', 'flags', 'options'];

it('should return an object with pattern and flags properties', () => {
expect(Object.keys(toRegExpDetails('a'))).toEqual(props);
});

it('should include an options property when the pattern uses subclass-based emulation', () => {
expect(Object.keys(toRegExpDetails('a++'))).toEqual(extProps);
expect(Object.keys(toRegExpDetails(r`(^|\G)a`))).toEqual(extProps);
expect(Object.keys(toRegExpDetails(r`(?<n>a)\g<n>`))).toEqual(extProps);
expect(Object.keys(toRegExpDetails(r`(?<n>a)\g<n>`, {avoidSubclass: true}))).toEqual(props);
});

it('should include an options property when the pattern uses lazy compilation', () => {
expect(Object.keys(toRegExpDetails('a', {lazyCompileMin: 0}))).toEqual(extProps);
expect(Object.keys(toRegExpDetails('a', {lazyCompileMin: Infinity}))).toEqual(props);
});
});
});
25 changes: 15 additions & 10 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {transform} from './transform.js';
import {generate} from './generate.js';
import {Accuracy, getOptions, Target} from './options.js';
import {parse} from './parse.js';
import {EmulatedRegExp} from './subclass.js';
import {EmulatedRegExp, LazyRegExp} from './subclass.js';
import {tokenize} from './tokenize.js';
import {atomic, possessive} from 'regex/internals';
import {recursion} from 'regex-recursion';
Expand Down Expand Up @@ -44,6 +44,7 @@ function toOnigurumaAst(pattern, options) {
flags?: string;
global?: boolean;
hasIndices?: boolean;
lazyCompileMin?: number;
rules?: {
allowOrphanBackrefs?: boolean;
asciiWordBoundaries?: boolean;
Expand All @@ -60,14 +61,15 @@ function toOnigurumaAst(pattern, options) {
Accepts an Oniguruma pattern and returns an equivalent JavaScript `RegExp`.
@param {string} pattern Oniguruma regex pattern.
@param {ToRegExpOptions} [options]
@returns {RegExp | EmulatedRegExp}
@returns {RegExp | EmulatedRegExp | LazyRegExp}
*/
function toRegExp(pattern, options) {
const result = toRegExpDetails(pattern, options);
if (result.options) {
return new EmulatedRegExp(result.pattern, result.flags, result.options);
const d = toRegExpDetails(pattern, options);
const ctor = d.options?.lazyCompile ? LazyRegExp : (d.options ? EmulatedRegExp : RegExp);
if (ctor === RegExp) {
return new RegExp(d.pattern, d.flags);
}
return new RegExp(result.pattern, result.flags);
return new ctor(d.pattern, d.flags, d.options);
}

/**
Expand Down Expand Up @@ -107,7 +109,7 @@ function toRegExpDetails(pattern, options) {
captureTransfers: recursionResult.captureTransfers,
hiddenCaptures: recursionResult.hiddenCaptures,
});
const result = {
const details = {
pattern: atomicResult.pattern,
flags: `${opts.hasIndices ? 'd' : ''}${opts.global ? 'g' : ''}${generated.flags}${generated.options.disable.v ? 'u' : 'v'}`,
};
Expand All @@ -117,15 +119,17 @@ function toRegExpDetails(pattern, options) {
// Change the map to the `EmulatedRegExp` format, serializable as JSON
const transfers = Array.from(atomicResult.captureTransfers);
const strategy = regexAst._strategy;
if (hiddenCaptures.length || transfers.length || strategy) {
result.options = {
const lazyCompile = details.pattern.length >= opts.lazyCompileMin;
if (hiddenCaptures.length || transfers.length || strategy || lazyCompile) {
details.options = {
...(hiddenCaptures.length && {hiddenCaptures}),
...(transfers.length && {transfers}),
...(strategy && {strategy}),
...(lazyCompile && {lazyCompile}),
};
}
}
return result;
return details;
}

// // Returns a Regex+ AST generated from an Oniguruma pattern
Expand All @@ -135,6 +139,7 @@ function toRegExpDetails(pattern, options) {

export {
EmulatedRegExp,
LazyRegExp,
toOnigurumaAst,
toRegExp,
toRegExpDetails,
Expand Down
2 changes: 2 additions & 0 deletions src/options.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ function getOptions(options) {
global: false,
// Include JavaScript flag `d` (`hasIndices`) in the result.
hasIndices: false,
// Pattern length threshold for delaying regex construction until first use.
lazyCompileMin: Infinity,
// JavaScript version used for generated regexes. Using `auto` detects the best value based on
// your environment. Later targets allow faster processing, simpler generated source, and
// support for additional features.
Expand Down
43 changes: 37 additions & 6 deletions src/subclass.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {getOrCreate, throwIfNot} from './utils.js';
/**
@typedef {{
hiddenCaptures?: Array<number>;
lazyCompile?: boolean;
strategy?: string | null;
transfers?: Array<[number, Array<number>]>;
}} EmulatedRegExpOptions
Expand Down Expand Up @@ -64,6 +65,7 @@ class EmulatedRegExp extends RegExp {
}
} else {
super(pattern, flags);
this.rawOptions = options ?? {};
const opts = {
hiddenCaptures: [],
strategy: null,
Expand All @@ -72,11 +74,6 @@ class EmulatedRegExp extends RegExp {
};
this.#captureMap = createCaptureMap(opts.hiddenCaptures, opts.transfers);
this.#strategy = opts.strategy;
this.rawOptions = {
...(opts.hiddenCaptures.length && {hiddenCaptures: opts.hiddenCaptures}),
...(opts.strategy && {strategy: opts.strategy}),
...(opts.transfers.length && {transfers: opts.transfers}),
};
}
}

Expand Down Expand Up @@ -166,6 +163,39 @@ class EmulatedRegExp extends RegExp {
}
}

class LazyRegExp extends RegExp {
_pattern;
_flags;
_regexp;
rawOptions = {};

get source() {
return this._pattern;
}

/**
@param {string} pattern
@param {string} [flags]
@param {EmulatedRegExpOptions} [options]
*/
constructor(pattern, flags, options) {
super('', flags);
this._pattern = pattern;
this._flags = flags;
this.rawOptions = options ?? {};
}

exec(str) {
if (!this._regexp) {
this._regexp = new EmulatedRegExp(this._pattern, this._flags, this.rawOptions);
}
this._regexp.lastIndex = this.lastIndex;
const match = this._regexp.exec(str);
this.lastIndex = this._regexp.lastIndex;
return match;
}
}

function adjustMatchDetailsForOffset(match, re, input, offset) {
match.input = input;
match.index += offset;
Expand Down Expand Up @@ -227,7 +257,7 @@ function createNameMap(pattern) {
let numCharClassesOpen = 0;
let numCaptures = 0;
let match;
while (match = re.exec(pattern)) {
while ((match = re.exec(pattern))) {
const {0: m, groups: {capture, name}} = match;
// Relies on no unescaped literal `[` in char classes (valid in JS if not using flag v), but
// this library's generator never produces unescaped literal `[` even with `target` ES2018 (see
Expand All @@ -250,4 +280,5 @@ function createNameMap(pattern) {

export {
EmulatedRegExp,
LazyRegExp,
};

0 comments on commit 8529eb5

Please sign in to comment.