Skip to content

Commit

Permalink
Put escape sequences into separate token
Browse files Browse the repository at this point in the history
  • Loading branch information
rlidwka committed Apr 15, 2022
1 parent 750a954 commit 75037c6
Show file tree
Hide file tree
Showing 11 changed files with 162 additions and 31 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [13.0.0] - WIP
### Added
- Added a new token type `text_special` to store escaped characters, same as `text` but
unaffected by replacement plugins (smartquotes, typographer, linkifier, etc.).
- Added a new rule `text_join` in `core` ruler. Text replacement plugins may choose to
insert themselves before it.

### Changed
- `text_collapse` rule is renamed to `fragments_join`.

### Fixed
- Smartquotes, typographic replacements and plain text links can now be escaped
with backslash (e.g. `\(c)` or `google\.com` are no longer replaced).


## [12.3.2] - 2022-01-08
### Security
- Fix possible ReDOS in newline rule. Thanks to @MakeNowJust.
Expand Down Expand Up @@ -592,6 +608,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Renamed presets folder (configs -> presets).


[13.0.0]: https://github.com/markdown-it/markdown-it/compare/12.3.2...13.0.0
[12.3.2]: https://github.com/markdown-it/markdown-it/compare/12.3.1...12.3.2
[12.3.1]: https://github.com/markdown-it/markdown-it/compare/12.3.0...12.3.1
[12.3.0]: https://github.com/markdown-it/markdown-it/compare/12.2.0...12.3.0
Expand Down
5 changes: 4 additions & 1 deletion lib/parser_core.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ var _rules = [
[ 'inline', require('./rules_core/inline') ],
[ 'linkify', require('./rules_core/linkify') ],
[ 'replacements', require('./rules_core/replacements') ],
[ 'smartquotes', require('./rules_core/smartquotes') ]
[ 'smartquotes', require('./rules_core/smartquotes') ],
// `text_join` finds `text_special` tokens (for escape sequences)
// and joins them with the rest of the text
[ 'text_join', require('./rules_core/text_join') ]
];


Expand Down
9 changes: 8 additions & 1 deletion lib/parser_inline.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,18 @@ var _rules = [
[ 'entity', require('./rules_inline/entity') ]
];

// `rule2` ruleset was created specifically for emphasis/strikethrough
// post-processing and may be changed in the future.
//
// Don't use this for anything except pairs (plugins working with `balance_pairs`).
//
var _rules2 = [
[ 'balance_pairs', require('./rules_inline/balance_pairs') ],
[ 'strikethrough', require('./rules_inline/strikethrough').postProcess ],
[ 'emphasis', require('./rules_inline/emphasis').postProcess ],
[ 'text_collapse', require('./rules_inline/text_collapse') ]
// rules for pairs separate '**' into its own text tokens, which may be left unused,
// rule below merges unused segments back with the rest of the text
[ 'fragments_join', require('./rules_inline/fragments_join') ]
];


Expand Down
5 changes: 3 additions & 2 deletions lib/presets/commonmark.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ module.exports = {
rules: [
'normalize',
'block',
'inline'
'inline',
'text_join'
]
},

Expand Down Expand Up @@ -73,7 +74,7 @@ module.exports = {
rules2: [
'balance_pairs',
'emphasis',
'text_collapse'
'fragments_join'
]
}
}
Expand Down
5 changes: 3 additions & 2 deletions lib/presets/zero.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ module.exports = {
rules: [
'normalize',
'block',
'inline'
'inline',
'text_join'
]
},

Expand All @@ -55,7 +56,7 @@ module.exports = {
],
rules2: [
'balance_pairs',
'text_collapse'
'fragments_join'
]
}
}
Expand Down
45 changes: 45 additions & 0 deletions lib/rules_core/text_join.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Join raw text tokens with the rest of the text
//
// This is set as a separate rule to provide an opportunity for plugins
// to run text replacements after text join, but before escape join.
//
// For example, `\:)` shouldn't be replaced with an emoji.
//
'use strict';


module.exports = function text_join(state) {
var j, l, tokens, curr, max, last,
blockTokens = state.tokens;

for (j = 0, l = blockTokens.length; j < l; j++) {
if (blockTokens[j].type !== 'inline') continue;

tokens = blockTokens[j].children;
max = tokens.length;

for (curr = 0; curr < max; curr++) {
if (tokens[curr].type === 'text_special') {
tokens[curr].type = 'text';
}
}

for (curr = last = 0; curr < max; curr++) {
if (tokens[curr].type === 'text' &&
curr + 1 < max &&
tokens[curr + 1].type === 'text') {

// collapse two adjacent text nodes
tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content;
} else {
if (curr !== last) { tokens[last] = tokens[curr]; }

last++;
}
}

if (curr !== last) {
tokens.length = last;
}
}
};
67 changes: 43 additions & 24 deletions lib/rules_inline/escape.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,40 +13,59 @@ for (var i = 0; i < 256; i++) { ESCAPED.push(0); }


module.exports = function escape(state, silent) {
var ch, pos = state.pos, max = state.posMax;

if (state.src.charCodeAt(pos) !== 0x5C/* \ */) { return false; }
var ch1, ch2, origStr, escapedStr, token, pos = state.pos, max = state.posMax;

if (state.src.charCodeAt(pos) !== 0x5C/* \ */) return false;
pos++;

if (pos < max) {
ch = state.src.charCodeAt(pos);
// '\' at the end of the inline block
if (pos >= max) return false;

ch1 = state.src.charCodeAt(pos);

if (ch < 256 && ESCAPED[ch] !== 0) {
if (!silent) { state.pending += state.src[pos]; }
state.pos += 2;
return true;
if (ch1 === 0x0A) {
if (!silent) {
state.push('hardbreak', 'br', 0);
}

if (ch === 0x0A) {
if (!silent) {
state.push('hardbreak', 'br', 0);
}
pos++;
// skip leading whitespaces from next line
while (pos < max) {
ch1 = state.src.charCodeAt(pos);
if (!isSpace(ch1)) break;
pos++;
}

state.pos = pos;
return true;
}

escapedStr = state.src[pos];

if (ch1 >= 0xD800 && ch1 <= 0xDBFF && pos + 1 < max) {
ch2 = state.src.charCodeAt(pos + 1);

if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
escapedStr += state.src[pos + 1];
pos++;
// skip leading whitespaces from next line
while (pos < max) {
ch = state.src.charCodeAt(pos);
if (!isSpace(ch)) { break; }
pos++;
}

state.pos = pos;
return true;
}
}

if (!silent) { state.pending += '\\'; }
state.pos++;
origStr = '\\' + escapedStr;

if (!silent) {
token = state.push('text_special', '', 0);

if (ch1 < 256 && ESCAPED[ch1] !== 0) {
token.content = escapedStr;
} else {
token.content = origStr;
}

token.markup = origStr;
token.info = 'escape';
}

state.pos = pos + 1;
return true;
};
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
'use strict';


module.exports = function text_collapse(state) {
module.exports = function fragments_join(state) {
var curr, last,
level = 0,
tokens = state.tokens,
Expand Down
13 changes: 13 additions & 0 deletions test/fixtures/markdown-it/smartquotes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,16 @@ Should parse quotes adjacent to inline html, #677:
<p>“test <br>”</p>
<p>“<br> test”</p>
.

Should be escapable:
.
"foo"

\"foo"

"foo\"
.
<p>“foo”</p>
<p>&quot;foo&quot;</p>
<p>&quot;foo&quot;</p>
.
17 changes: 17 additions & 0 deletions test/fixtures/markdown-it/typographer.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ dupes
<p>!!! ??? ,</p>
.

copyright should be escapable
.
\(c)
.
<p>(c)</p>
.


dashes
.
Expand All @@ -80,6 +87,16 @@ markdownit--awesome
<p>markdownit–awesome</p>
.

dashes should be escapable
.
foo \-- bar

foo -\- bar
.
<p>foo -- bar</p>
<p>foo -- bar</p>
.

regression tests for #624
.
1---2---3
Expand Down
8 changes: 8 additions & 0 deletions test/misc.js
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,14 @@ describe('Misc', function () {
md.render('# test\n\n - hello\n - world\n')
);
});

it('Should escape surrogate pairs (coverage)', function () {
var md = markdownit();

assert.strictEqual(md.render('\\\uD835\uDC9C'), '<p>\\\uD835\uDC9C</p>\n');
assert.strictEqual(md.render('\\\uD835x'), '<p>\\\uD835x</p>\n');
assert.strictEqual(md.render('\\\uD835'), '<p>\\\uD835</p>\n');
});
});


Expand Down

0 comments on commit 75037c6

Please sign in to comment.