Skip to content

Commit

Permalink
Improve emphasis algorithm
Browse files Browse the repository at this point in the history
This fixes quadratic complexity in `**<...>**a**<...>**`
pathological case.
  • Loading branch information
rlidwka committed Dec 9, 2021
1 parent e07a9dd commit 24abaa5
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 21 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [12.3.0] - WIP
### Changed
- `StateInline.delimiters[].jump` is removed.

### Fixed
- Fixed quadratic complexity in pathological `***<10k stars>***a***<10k stars>***` case.


## [12.2.0] - 2021-08-02
### Added
- Ordered lists: add order value to token info.
Expand Down Expand Up @@ -575,6 +583,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Renamed presets folder (configs -> presets).


[12.3.0]: https://github.com/markdown-it/markdown-it/compare/12.2.0...12.3.0
[12.2.0]: https://github.com/markdown-it/markdown-it/compare/12.1.0...12.2.0
[12.1.0]: https://github.com/markdown-it/markdown-it/compare/12.0.6...12.1.0
[12.0.6]: https://github.com/markdown-it/markdown-it/compare/12.0.5...12.0.6
Expand Down
36 changes: 28 additions & 8 deletions lib/rules_inline/balance_pairs.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,28 @@ function processDelimiters(state, delimiters) {
openersBottom = {},
max = delimiters.length;

if (!max) return;

// headerIdx is the first delimiter of the current (where closer is) delimiter run
var headerIdx = 0;
var lastTokenIdx = -2; // needs any value lower than -1
var jumps = [];

for (closerIdx = 0; closerIdx < max; closerIdx++) {
closer = delimiters[closerIdx];

jumps.push(0);

// markers belong to same delimiter run if:
// - they have adjacent tokens
// - AND markers are the same
//
if (delimiters[headerIdx].marker !== closer.marker || lastTokenIdx !== closer.token - 1) {
headerIdx = closerIdx;
}

lastTokenIdx = closer.token;

// Length is only used for emphasis-specific "rule of 3",
// if it's not defined (in strikethrough or 3rd party plugins),
// we can default it to 0 to disable those checks.
Expand All @@ -30,14 +49,11 @@ function processDelimiters(state, delimiters) {

minOpenerIdx = openersBottom[closer.marker][(closer.open ? 3 : 0) + (closer.length % 3)];

openerIdx = closerIdx - closer.jump - 1;

// avoid crash if `closer.jump` is pointing outside of the array, see #742
if (openerIdx < -1) openerIdx = -1;
openerIdx = headerIdx - jumps[headerIdx] - 1;

newMinOpenerIdx = openerIdx;

for (; openerIdx > minOpenerIdx; openerIdx -= opener.jump + 1) {
for (; openerIdx > minOpenerIdx; openerIdx -= jumps[openerIdx] + 1) {
opener = delimiters[openerIdx];

if (opener.marker !== closer.marker) continue;
Expand Down Expand Up @@ -67,15 +83,19 @@ function processDelimiters(state, delimiters) {
// sure algorithm has linear complexity (see *_*_*_*_*_... case).
//
lastJump = openerIdx > 0 && !delimiters[openerIdx - 1].open ?
delimiters[openerIdx - 1].jump + 1 :
jumps[openerIdx - 1] + 1 :
0;

closer.jump = closerIdx - openerIdx + lastJump;
jumps[closerIdx] = closerIdx - openerIdx + lastJump;
jumps[openerIdx] = lastJump;

closer.open = false;
opener.end = closerIdx;
opener.jump = lastJump;
opener.close = false;
newMinOpenerIdx = -1;
// treat next token as start of run,
// it optimizes skips in **<...>**a**<...>** pathological case
lastTokenIdx = -2;
break;
}
}
Expand Down
15 changes: 4 additions & 11 deletions lib/rules_inline/emphasis.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,6 @@ module.exports.tokenize = function emphasis(state, silent) {
//
length: scanned.length,

// An amount of characters before this one that's equivalent to
// current one. In plain English: if this delimiter does not open
// an emphasis, neither do previous `jump` characters.
//
// Used to skip sequences like "*****" in one step, for 1st asterisk
// value will be 0, for 2nd it's 1 and so on.
//
jump: i,

// A position of the token this delimiter corresponds to.
//
token: state.tokens.length - 1,
Expand Down Expand Up @@ -91,9 +82,11 @@ function postProcess(state, delimiters) {
//
isStrong = i > 0 &&
delimiters[i - 1].end === startDelim.end + 1 &&
// check that first two markers match and adjacent
delimiters[i - 1].marker === startDelim.marker &&
delimiters[i - 1].token === startDelim.token - 1 &&
delimiters[startDelim.end + 1].token === endDelim.token + 1 &&
delimiters[i - 1].marker === startDelim.marker;
// check that last two markers are adjacent (we can safely assume they match)
delimiters[startDelim.end + 1].token === endDelim.token + 1;

ch = String.fromCharCode(startDelim.marker);

Expand Down
1 change: 0 additions & 1 deletion lib/rules_inline/strikethrough.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ module.exports.tokenize = function strikethrough(state, silent) {
state.delimiters.push({
marker: marker,
length: 0, // disable "rule of 3" length checks meant for emphasis
jump: i / 2, // for `~~` 1 marker = 2 characters
token: state.tokens.length - 1,
end: -1,
open: scanned.can_open,
Expand Down
4 changes: 4 additions & 0 deletions test/pathological.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ describe('Pathological sequences speed', () => {
);
});

it('nested inlines', async () => {
await test_pattern('*'.repeat(60000) + 'a' + '*'.repeat(60000));
});

it('nested strong emph', async () => {
await test_pattern('*a **a '.repeat(5000) + 'b' + ' a** a*'.repeat(5000));
});
Expand Down
2 changes: 1 addition & 1 deletion test/pathological.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{ "md5": "10730e0b6dec7355412bb632e9eb1d98" }
{ "md5": "c417101e12950cc61ff0a6d2cebb80e0" }

0 comments on commit 24abaa5

Please sign in to comment.