diff --git a/CHANGELOG.md b/CHANGELOG.md index 83ad0cb94..10e4da2ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [12.3.0] - WIP +### Changed +- `StateInline.delimiters[].jump` is removed. + +### Fixed +- Fixed quadratic complexity in pathological `***<10k stars>***a***<10k stars>***` case. + + ## [12.2.0] - 2021-08-02 ### Added - Ordered lists: add order value to token info. @@ -575,6 +583,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Renamed presets folder (configs -> presets). +[12.3.0]: https://github.com/markdown-it/markdown-it/compare/12.2.0...12.3.0 [12.2.0]: https://github.com/markdown-it/markdown-it/compare/12.1.0...12.2.0 [12.1.0]: https://github.com/markdown-it/markdown-it/compare/12.0.6...12.1.0 [12.0.6]: https://github.com/markdown-it/markdown-it/compare/12.0.5...12.0.6 diff --git a/lib/rules_inline/balance_pairs.js b/lib/rules_inline/balance_pairs.js index c6ee5a78d..4faad9068 100644 --- a/lib/rules_inline/balance_pairs.js +++ b/lib/rules_inline/balance_pairs.js @@ -9,9 +9,28 @@ function processDelimiters(state, delimiters) { openersBottom = {}, max = delimiters.length; + if (!max) return; + + // headerIdx is the first delimiter of the current (where closer is) delimiter run + var headerIdx = 0; + var lastTokenIdx = -2; // needs any value lower than -1 + var jumps = []; + for (closerIdx = 0; closerIdx < max; closerIdx++) { closer = delimiters[closerIdx]; + jumps.push(0); + + // markers belong to same delimiter run if: + // - they have adjacent tokens + // - AND markers are the same + // + if (delimiters[headerIdx].marker !== closer.marker || lastTokenIdx !== closer.token - 1) { + headerIdx = closerIdx; + } + + lastTokenIdx = closer.token; + // Length is only used for emphasis-specific "rule of 3", // if it's not defined (in strikethrough or 3rd party plugins), // we can default it to 0 to disable those checks. @@ -30,14 +49,11 @@ function processDelimiters(state, delimiters) { minOpenerIdx = openersBottom[closer.marker][(closer.open ? 3 : 0) + (closer.length % 3)]; - openerIdx = closerIdx - closer.jump - 1; - - // avoid crash if `closer.jump` is pointing outside of the array, see #742 - if (openerIdx < -1) openerIdx = -1; + openerIdx = headerIdx - jumps[headerIdx] - 1; newMinOpenerIdx = openerIdx; - for (; openerIdx > minOpenerIdx; openerIdx -= opener.jump + 1) { + for (; openerIdx > minOpenerIdx; openerIdx -= jumps[openerIdx] + 1) { opener = delimiters[openerIdx]; if (opener.marker !== closer.marker) continue; @@ -67,15 +83,19 @@ function processDelimiters(state, delimiters) { // sure algorithm has linear complexity (see *_*_*_*_*_... case). // lastJump = openerIdx > 0 && !delimiters[openerIdx - 1].open ? - delimiters[openerIdx - 1].jump + 1 : + jumps[openerIdx - 1] + 1 : 0; - closer.jump = closerIdx - openerIdx + lastJump; + jumps[closerIdx] = closerIdx - openerIdx + lastJump; + jumps[openerIdx] = lastJump; + closer.open = false; opener.end = closerIdx; - opener.jump = lastJump; opener.close = false; newMinOpenerIdx = -1; + // treat next token as start of run, + // it optimizes skips in **<...>**a**<...>** pathological case + lastTokenIdx = -2; break; } } diff --git a/lib/rules_inline/emphasis.js b/lib/rules_inline/emphasis.js index c140d2c10..7e8ab4cd7 100644 --- a/lib/rules_inline/emphasis.js +++ b/lib/rules_inline/emphasis.js @@ -29,15 +29,6 @@ module.exports.tokenize = function emphasis(state, silent) { // length: scanned.length, - // An amount of characters before this one that's equivalent to - // current one. In plain English: if this delimiter does not open - // an emphasis, neither do previous `jump` characters. - // - // Used to skip sequences like "*****" in one step, for 1st asterisk - // value will be 0, for 2nd it's 1 and so on. - // - jump: i, - // A position of the token this delimiter corresponds to. // token: state.tokens.length - 1, @@ -91,9 +82,11 @@ function postProcess(state, delimiters) { // isStrong = i > 0 && delimiters[i - 1].end === startDelim.end + 1 && + // check that first two markers match and adjacent + delimiters[i - 1].marker === startDelim.marker && delimiters[i - 1].token === startDelim.token - 1 && - delimiters[startDelim.end + 1].token === endDelim.token + 1 && - delimiters[i - 1].marker === startDelim.marker; + // check that last two markers are adjacent (we can safely assume they match) + delimiters[startDelim.end + 1].token === endDelim.token + 1; ch = String.fromCharCode(startDelim.marker); diff --git a/lib/rules_inline/strikethrough.js b/lib/rules_inline/strikethrough.js index cb8944fc3..3c35adf5f 100644 --- a/lib/rules_inline/strikethrough.js +++ b/lib/rules_inline/strikethrough.js @@ -33,7 +33,6 @@ module.exports.tokenize = function strikethrough(state, silent) { state.delimiters.push({ marker: marker, length: 0, // disable "rule of 3" length checks meant for emphasis - jump: i / 2, // for `~~` 1 marker = 2 characters token: state.tokens.length - 1, end: -1, open: scanned.can_open, diff --git a/test/pathological.js b/test/pathological.js index f8c69047e..8481efd85 100644 --- a/test/pathological.js +++ b/test/pathological.js @@ -57,6 +57,10 @@ describe('Pathological sequences speed', () => { ); }); + it('nested inlines', async () => { + await test_pattern('*'.repeat(60000) + 'a' + '*'.repeat(60000)); + }); + it('nested strong emph', async () => { await test_pattern('*a **a '.repeat(5000) + 'b' + ' a** a*'.repeat(5000)); }); diff --git a/test/pathological.json b/test/pathological.json index 7339947ad..4aeb6306f 100644 --- a/test/pathological.json +++ b/test/pathological.json @@ -1 +1 @@ -{ "md5": "10730e0b6dec7355412bb632e9eb1d98" } +{ "md5": "c417101e12950cc61ff0a6d2cebb80e0" }