diff --git a/CHANGELOG.md b/CHANGELOG.md index 95f4ab967..2ed5c40c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Smartquotes, typographic replacements and plain text links can now be escaped with backslash (e.g. `\(c)` or `google\.com` are no longer replaced). +- Fixed collision of emphasis and linkifier (so `http://example.org/foo._bar_-_baz` + is now a single link, not emphasized). Emails and fuzzy links are not affected by this. ## [12.3.2] - 2022-01-08 diff --git a/lib/parser_inline.js b/lib/parser_inline.js index 49fea64c5..5f384a196 100644 --- a/lib/parser_inline.js +++ b/lib/parser_inline.js @@ -14,6 +14,7 @@ var Ruler = require('./ruler'); var _rules = [ [ 'text', require('./rules_inline/text') ], + [ 'linkify', require('./rules_inline/linkify') ], [ 'newline', require('./rules_inline/newline') ], [ 'escape', require('./rules_inline/escape') ], [ 'backticks', require('./rules_inline/backticks') ], diff --git a/lib/rules_core/linkify.js b/lib/rules_core/linkify.js index 7c3ffc865..11294a594 100644 --- a/lib/rules_core/linkify.js +++ b/lib/rules_core/linkify.js @@ -69,8 +69,17 @@ module.exports = function linkify(state) { level = currentToken.level; lastPos = 0; - for (ln = 0; ln < links.length; ln++) { + // forbid escape sequence at the start of the string, + // this avoids http\://example.com/ from being linkified as + // http://example.com/ + if (links.length > 0 && + links[0].index === 0 && + i > 0 && + tokens[i - 1].type === 'text_special') { + links = links.slice(1); + } + for (ln = 0; ln < links.length; ln++) { url = links[ln].url; fullUrl = state.md.normalizeLink(url); if (!state.md.validateLink(fullUrl)) { continue; } diff --git a/lib/rules_inline/html_inline.js b/lib/rules_inline/html_inline.js index 28c798055..da319b722 100644 --- a/lib/rules_inline/html_inline.js +++ b/lib/rules_inline/html_inline.js @@ -6,6 +6,14 @@ var HTML_TAG_RE = require('../common/html_re').HTML_TAG_RE; +function isLinkOpen(str) { + return /^\s]/i.test(str); +} +function isLinkClose(str) { + return /^<\/a\s*>/i.test(str); +} + + function isLetter(ch) { /*eslint no-bitwise:0*/ var lc = ch | 0x20; // to lower case @@ -41,6 +49,9 @@ module.exports = function html_inline(state, silent) { if (!silent) { token = state.push('html_inline', '', 0); token.content = state.src.slice(pos, pos + match[0].length); + + if (isLinkOpen(token.content)) state.linkLevel++; + if (isLinkClose(token.content)) state.linkLevel--; } state.pos += match[0].length; return true; diff --git a/lib/rules_inline/link.js b/lib/rules_inline/link.js index 1d242bfe3..fec5acb39 100644 --- a/lib/rules_inline/link.js +++ b/lib/rules_inline/link.js @@ -137,7 +137,9 @@ module.exports = function link(state, silent) { attrs.push([ 'title', title ]); } + state.linkLevel++; state.md.inline.tokenize(state); + state.linkLevel--; token = state.push('link_close', 'a', -1); } diff --git a/lib/rules_inline/linkify.js b/lib/rules_inline/linkify.js new file mode 100644 index 000000000..53e10bfe1 --- /dev/null +++ b/lib/rules_inline/linkify.js @@ -0,0 +1,58 @@ +// Process links like https://example.org/ + +'use strict'; + + +// RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +var SCHEME_RE = /(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$/i; + + +module.exports = function linkify(state, silent) { + var pos, max, match, proto, link, url, fullUrl, token; + + if (!state.md.options.linkify) return false; + if (state.linkLevel > 0) return false; + + pos = state.pos; + max = state.posMax; + + if (pos + 3 > max) return false; + if (state.src.charCodeAt(pos) !== 0x3A/* : */) return false; + if (state.src.charCodeAt(pos + 1) !== 0x2F/* / */) return false; + if (state.src.charCodeAt(pos + 2) !== 0x2F/* / */) return false; + + match = state.pending.match(SCHEME_RE); + if (!match) return false; + + proto = match[1]; + + link = state.md.linkify.matchAtStart(state.src.slice(pos - proto.length)); + if (!link) return false; + + url = link.url; + + // disallow '*' at the end of the link (conflicts with emphasis) + url = url.replace(/\*+$/, ''); + + fullUrl = state.md.normalizeLink(url); + if (!state.md.validateLink(fullUrl)) return false; + + if (!silent) { + state.pending = state.pending.slice(0, -proto.length); + + token = state.push('link_open', 'a', 1); + token.attrs = [ [ 'href', fullUrl ] ]; + token.markup = 'linkify'; + token.info = 'auto'; + + token = state.push('text', '', 0); + token.content = state.md.normalizeLinkText(url); + + token = state.push('link_close', 'a', -1); + token.markup = 'linkify'; + token.info = 'auto'; + } + + state.pos += url.length - proto.length; + return true; +}; diff --git a/lib/rules_inline/state_inline.js b/lib/rules_inline/state_inline.js index efbf9bd88..5d41acd5a 100644 --- a/lib/rules_inline/state_inline.js +++ b/lib/rules_inline/state_inline.js @@ -35,6 +35,10 @@ function StateInline(src, md, env, outTokens) { // backtick length => last seen position this.backticks = {}; this.backticksScanned = false; + + // Counter used to disable inline linkify-it execution + // inside and markdown links + this.linkLevel = 0; } diff --git a/package.json b/package.json index 0829bbecd..1877ce00a 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,7 @@ "dependencies": { "argparse": "^2.0.1", "entities": "~3.0.1", - "linkify-it": "^3.0.1", + "linkify-it": "markdown-it/linkify-it", "mdurl": "^1.0.1", "uc.micro": "^1.0.5" }, diff --git a/test/fixtures/markdown-it/linkify.txt b/test/fixtures/markdown-it/linkify.txt index 1e86cac96..5721b876c 100644 --- a/test/fixtures/markdown-it/linkify.txt +++ b/test/fixtures/markdown-it/linkify.txt @@ -30,6 +30,86 @@ don't touch text in html tags . +entities inside raw links +. +https://example.com/foo&bar +. +

https://example.com/foo&amp;bar

+. + + +emphasis inside raw links (asterisk, can happen in links with params) +. +https://example.com/foo*bar*baz +. +

https://example.com/foo*bar*baz

+. + + +emphasis inside raw links (underscore) +. +http://example.org/foo._bar_-_baz +. +

http://example.org/foo._bar_-_baz

+. + + +backticks inside raw links +. +https://example.com/foo`bar`baz +. +

https://example.com/foo`bar`baz

+. + + +links inside raw links +. +https://example.com/foo[123](456)bar +. +

https://example.com/foo[123](456)bar

+. + + +escapes not allowed at the start +. +\https://example.com +. +

\https://example.com

+. + + +escapes not allowed at comma +. +https\://example.com +. +

https://example.com

+. + + +escapes not allowed at slashes +. +https:\//aa.org https://bb.org +. +

https://aa.org https://bb.org

+. + + +fuzzy link shouldn't match cc.org +. +https:/\/cc.org +. +

https://cc.org

+. + + +bold links (exclude markup of pairs from link tail) +. +**http://example.com/foobar** +. +

http://example.com/foobar

+. + + match links without protocol . www.example.org @@ -55,3 +135,35 @@ http://example.com/(c) .

http://example.com/(c)

. + + +coverage, prefix not valid +. +http:/example.com/ +. +

http:/example.com/

+. + + +coverage, negative link level +. +[https://example.com](https://example.com) +. +

https://example.com

+. + + +emphasis with '*', real link: +. +http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B +. +

http://cdecl.ridiculousfish.com/?q=int+(*f)+(float+*)%3B

+. + + +emphasis with '_', real link: +. +https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf +. +

https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf

+.