Skip to content

Commit

Permalink
Fix capture transfer edges
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Jan 30, 2025
1 parent 0281e56 commit bb0d966
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 93 deletions.
131 changes: 76 additions & 55 deletions spec/recursion-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ describe('recursion', () => {
});
});

it('should transfer to capture preceding recursion', () => {
it('should transfer to capture that precedes the recursion', () => {
expect(recursion(r`()(()(a)()\g<2&R=2>?b)`, {
captureTransfers: new Map([[1, 4]]),
hiddenCaptures: [4],
Expand All @@ -188,7 +188,7 @@ describe('recursion', () => {
})).toEqual({
pattern: '()(a(?:a(?:)?()(b)())?()(b)())',
captureTransfers: new Map([[1, 7]]),
hiddenCaptures: [7, 3, 4, 5],
hiddenCaptures: [7, 3, 4, 5], // unsorted
});
});

Expand All @@ -203,30 +203,51 @@ describe('recursion', () => {
});

it('should transfer across multiple recursions', () => {
// Capture in left contents of recursions
expect(recursion(r`(?<r>(a)\g<r&R=2>?b) ((a)\g<3&R=2>?b)`, {
captureTransfers: new Map([[1, 3], ['r', 3], [2, 4]]),
})).toEqual({
pattern: '(?<r>(a)(?:(a)(?:)?b)?b) ((a)(?:(a)(?:)?b)?b)',
captureTransfers: new Map([[1, 4], ['r', 4], [2, 6]]),
hiddenCaptures: [3, 6],
});
// Capture in right contents of recursions
expect(recursion(r`(?<r>a\g<r&R=2>?(b)) (a\g<3&R=2>?(b))`, {
captureTransfers: new Map([[1, 3], ['r', 3], [2, 4]]),
})).toEqual({
pattern: '(?<r>a(?:a(?:)?(b))?(b)) (a(?:a(?:)?(b))?(b))',
captureTransfers: new Map([[1, 4], ['r', 4], [3, 6]]),
hiddenCaptures: [2, 5],
});
// Capture in left and right contents of recursions
expect(recursion(r`(?<r>(a)\g<r&R=2>?(b)) ((a)\g<4&R=2>?(b))`, {
captureTransfers: new Map([[1, 4], ['r', 4], [2, 5], [3, 6]]),
})).toEqual({
pattern: '(?<r>(a)(?:(a)(?:)?(b))?(b)) ((a)(?:(a)(?:)?(b))?(b))',
captureTransfers: new Map([[1, 6], ['r', 6], [2, 8], [5, 10]]),
hiddenCaptures: [3, 4, 8, 9],
});
// Triple recursion with capture transfer to middle (Oniguruma: `\g<a> (?<a>a\g<b>?b) (?<b>c\g<a>?d)`)
expect(recursion(r`(a(c\g<1&R=2>?d)?b) (?<a>a(c\g<3&R=2>?d)?b) (?<b>c(a\g<5&R=2>?b)?d)`, {
captureTransfers: new Map([[3, 6], ['a', 6]]),
hiddenCaptures: [1, 2, 4, 6],
})).toEqual({
pattern: '(a(c(?:a(c(?:)?d)?b)?d)?b) (?<a>a(c(?:a(c(?:)?d)?b)?d)?b) (?<b>c(a(?:c(a(?:)?b)?d)?b)?d)',
captureTransfers: new Map([[4, 9],['a', 9]]),
hiddenCaptures: [1, 2, 5, 8, 3, 6, 9], // unsorted
});
// Same as above but with depth 3
expect(recursion(r`(a(c\g<1&R=3>?d)?b) (?<a>a(c\g<3&R=3>?d)?b) (?<b>c(a\g<5&R=3>?b)?d)`, {
captureTransfers: new Map([[3, 6], ['a', 6]]),
hiddenCaptures: [1, 2, 4, 6],
})).toEqual({
pattern: '(a(c(?:a(c(?:a(c(?:)?d)?b)?d)?b)?d)?b) (?<a>a(c(?:a(c(?:a(c(?:)?d)?b)?d)?b)?d)?b) (?<b>c(a(?:c(a(?:c(a(?:)?b)?d)?b)?d)?b)?d)',
captureTransfers: new Map([[5, 12],['a', 12]]),
hiddenCaptures: [1, 2, 6, 10, 3, 4, 7, 8, 11, 12], // unsorted
});
});

it('should transfer for captures after recursion', () => {
it('should transfer between captures following recursion', () => {
expect(recursion(r`((2)\g<1&R=2>?) (3) (4)`, {
captureTransfers: new Map([[3, 4]]),
})).toEqual({
Expand All @@ -237,64 +258,64 @@ describe('recursion', () => {
});
});
});
});

describe('readme examples', () => {
it('should match an equal number of two different subpatterns', () => {
const re = regex({plugins: [recursion]})`a(?R=20)?b`;
expect(re.exec('test aaaaaabbb')[0]).toBe('aaabbb');
});
describe('readme examples', () => {
it('should match an equal number of two different subpatterns', () => {
const re = regex({plugins: [recursion]})`a(?R=20)?b`;
expect(re.exec('test aaaaaabbb')[0]).toBe('aaabbb');
});

it('should match an equal number of two different subpatterns, as the entire string', () => {
const re = regex({plugins: [recursion]})`
^ (?<r> a \g<r&R=20>? b) $
`;
expect(re.test('aaabbb')).toBeTrue();
expect(re.test('aaabb')).toBeFalse();
});
it('should match an equal number of two different subpatterns, as the entire string', () => {
const re = regex({plugins: [recursion]})`
^ (?<r> a \g<r&R=20>? b) $
`;
expect(re.test('aaabbb')).toBeTrue();
expect(re.test('aaabb')).toBeFalse();
});

it('should match balanced parentheses', () => {
const parens = regex({flags: 'g', plugins: [recursion]})`
\( ([^\(\)] | (?R=20))* \)
`;
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
});
it('should match balanced parentheses', () => {
const parens = regex({flags: 'g', plugins: [recursion]})`
\( ([^\(\)] | (?R=20))* \)
`;
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
});

it('should match balanced parentheses using an atomic group', () => {
const parens = regex({flags: 'g', plugins: [recursion]})`
\( ((?> [^\(\)]+) | (?R=20))* \)
`;
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
});
it('should match balanced parentheses using an atomic group', () => {
const parens = regex({flags: 'g', plugins: [recursion]})`
\( ((?> [^\(\)]+) | (?R=20))* \)
`;
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
});

it('should match balanced parentheses using a possessive quantifier', () => {
const parens = regex({flags: 'g', plugins: [recursion]})`
\( ([^\(\)]++ | (?R=20))* \)
`;
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
});
it('should match balanced parentheses using a possessive quantifier', () => {
const parens = regex({flags: 'g', plugins: [recursion]})`
\( ([^\(\)]++ | (?R=20))* \)
`;
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
});

it('should match palindromes', () => {
const palindromes = regex({flags: 'gi', plugins: [recursion]})`
(?<char> \w)
# Recurse, or match a lone unbalanced char in the middle
((?R=15) | \w?)
\k<char>
`;
expect('Racecar, ABBA, and redivided'.match(palindromes)).toEqual(['Racecar', 'ABBA', 'edivide']);
});
it('should match palindromes', () => {
const palindromes = regex({flags: 'gi', plugins: [recursion]})`
(?<char> \w)
# Recurse, or match a lone unbalanced char in the middle
((?R=15) | \w?)
\k<char>
`;
expect('Racecar, ABBA, and redivided'.match(palindromes)).toEqual(['Racecar', 'ABBA', 'edivide']);
});

it('should match palindromes as complete words', () => {
const palindromeWords = regex({flags: 'gi', plugins: [recursion]})`
\b
(?<palindrome>
(?<char> \w )
# Recurse, or match a lone unbalanced char in the center
( \g<palindrome&R=15> | \w? )
\k<char>
)
\b
`;
expect('Racecar, ABBA, and redivided'.match(palindromeWords)).toEqual(['Racecar', 'ABBA']);
});
it('should match palindromes as complete words', () => {
const palindromeWords = regex({flags: 'gi', plugins: [recursion]})`
\b
(?<palindrome>
(?<char> \w )
# Recurse, or match a lone unbalanced char in the center
( \g<palindrome&R=15> | \w? )
\k<char>
)
\b
`;
expect('Racecar, ABBA, and redivided'.match(palindromeWords)).toEqual(['Racecar', 'ABBA']);
});
});
84 changes: 46 additions & 38 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,16 @@ function recursion(pattern, data) {
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with global recursion`
);
}
const pre = pattern.slice(0, match.index);
const post = pattern.slice(token.lastIndex);
if (hasUnescaped(post, recursiveToken, Context.DEFAULT)) {
const left = pattern.slice(0, match.index);
const right = pattern.slice(token.lastIndex);
if (hasUnescaped(right, recursiveToken, Context.DEFAULT)) {
throw new Error(overlappingRecursionMsg);
}
const reps = +rDepth - 1;
pattern = makeRecursive(
pre,
post,
+rDepth,
left,
right,
reps,
false,
hiddenCaptures,
addedHiddenCaptures,
Expand All @@ -96,7 +97,8 @@ function recursion(pattern, data) {
captureTransfers = mapCaptureTransfers(
captureTransfers,
numCapturesPassed,
pre,
left,
reps,
addedHiddenCaptures.length,
0
);
Expand Down Expand Up @@ -132,13 +134,14 @@ function recursion(pattern, data) {
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with recursion of capturing groups`
);
}
const groupContentsPre = pattern.slice(startPos, match.index);
const groupContentsPost = groupContents.slice(groupContentsPre.length + m.length);
const groupContentsLeft = pattern.slice(startPos, match.index);
const groupContentsRight = groupContents.slice(groupContentsLeft.length + m.length);
const numAddedHiddenCapturesPreExpansion = addedHiddenCaptures.length;
const reps = +gRDepth - 1;
const expansion = makeRecursive(
groupContentsPre,
groupContentsPost,
+gRDepth,
groupContentsLeft,
groupContentsRight,
reps,
true,
hiddenCaptures,
addedHiddenCaptures,
Expand All @@ -147,7 +150,8 @@ function recursion(pattern, data) {
captureTransfers = mapCaptureTransfers(
captureTransfers,
numCapturesPassed,
groupContentsPre,
groupContentsLeft,
reps,
addedHiddenCaptures.length - numAddedHiddenCapturesPreExpansion,
numAddedHiddenCapturesPreExpansion
);
Expand All @@ -156,7 +160,7 @@ function recursion(pattern, data) {
// Modify the string we're looping over
pattern = `${pre}${expansion}${post}`;
// Step forward for the next loop iteration
token.lastIndex += expansion.length - m.length - groupContentsPre.length - groupContentsPost.length;
token.lastIndex += expansion.length - m.length - groupContentsLeft.length - groupContentsRight.length;
openGroups.forEach(g => g.hasRecursedWithin = true);
hasRecursed = true;
} else if (captureName) {
Expand Down Expand Up @@ -207,19 +211,19 @@ function assertMaxInBounds(max) {
}

/**
@param {string} pre
@param {string} post
@param {number} maxDepth
@param {string} left
@param {string} right
@param {number} reps
@param {boolean} isSubpattern
@param {Array<number>} hiddenCaptures
@param {Array<number>} addedHiddenCaptures
@param {number} numCapturesPassed
@returns {string}
*/
function makeRecursive(
pre,
post,
maxDepth,
left,
right,
reps,
isSubpattern,
hiddenCaptures,
addedHiddenCaptures,
Expand All @@ -228,25 +232,25 @@ function makeRecursive(
const namesInRecursed = new Set();
// Can skip this work if not needed
if (isSubpattern) {
forEachUnescaped(pre + post, namedCaptureDelim, ({groups: {captureName}}) => {
forEachUnescaped(left + right, namedCaptureDelim, ({groups: {captureName}}) => {
namesInRecursed.add(captureName);
}, Context.DEFAULT);
}
const rest = [
maxDepth - 1, // reps
isSubpattern ? namesInRecursed : null, // namesInRecursed
reps,
isSubpattern ? namesInRecursed : null,
hiddenCaptures,
addedHiddenCaptures,
numCapturesPassed,
];
// Depth 2: 'pre(?:pre(?:)post)post'
// Depth 3: 'pre(?:pre(?:pre(?:)post)post)post'
// Depth 2: 'left(?:left(?:)right)right'
// Depth 3: 'left(?:left(?:left(?:)right)right)right'
// Empty group in the middle separates tokens and absorbs a following quantifier if present
return `${pre}${
repeatWithDepth(`(?:${pre}`, 'forward', ...rest)
return `${left}${
repeatWithDepth(`(?:${left}`, 'forward', ...rest)
}(?:)${
repeatWithDepth(`${post})`, 'backward', ...rest)
}${post}`;
repeatWithDepth(`${right})`, 'backward', ...rest)
}${right}`;
}

/**
Expand Down Expand Up @@ -312,28 +316,32 @@ function incrementIfAtLeast(arr, threshold) {
/**
@param {Map<number | string, number>} captureTransfers
@param {number} numCapturesPassed
@param {string} leftContents
@param {string} left
@param {number} reps
@param {number} numCapturesAddedInExpansion
@param {number} numAddedHiddenCapturesPreExpansion
@returns {Map<number | string, number>}
*/
function mapCaptureTransfers(captureTransfers, numCapturesPassed, leftContents, numCapturesAddedInExpansion, numAddedHiddenCapturesPreExpansion) {
function mapCaptureTransfers(captureTransfers, numCapturesPassed, left, reps, numCapturesAddedInExpansion, numAddedHiddenCapturesPreExpansion) {
if (captureTransfers.size && numCapturesAddedInExpansion) {
let numCapturesInLeftContents = 0;
forEachUnescaped(leftContents, captureDelim, () => numCapturesInLeftContents++, Context.DEFAULT);
const recursionDelimCaptureNum = numCapturesPassed - numCapturesInLeftContents + numAddedHiddenCapturesPreExpansion;
let numCapturesInLeft = 0;
forEachUnescaped(left, captureDelim, () => numCapturesInLeft++, Context.DEFAULT);
const recursionDelimCaptureNum = numCapturesPassed - numCapturesInLeft + numAddedHiddenCapturesPreExpansion; // 0 for global
const newCaptureTransfers = new Map();
captureTransfers.forEach((/** @type {number} */ from, /** @type {number | string} */ to) => {
// `to` can be a group number or name
if (to > (numCapturesPassed + numAddedHiddenCapturesPreExpansion)) {
to += numCapturesAddedInExpansion;
}
if (from > recursionDelimCaptureNum) {
from += (
// if capture is on left side of expanded group
from <= (recursionDelimCaptureNum + numCapturesInLeftContents) ?
numCapturesInLeftContents :
from <= (recursionDelimCaptureNum + numCapturesInLeft) ?
numCapturesInLeft * reps :
numCapturesAddedInExpansion
);
}
// `to` can be a group number or name
newCaptureTransfers.set((to > numCapturesPassed ? to + numCapturesAddedInExpansion : to), from);
newCaptureTransfers.set(to, from);
});
return newCaptureTransfers;
}
Expand Down

0 comments on commit bb0d966

Please sign in to comment.