From e160e5794c7b8d67603716fa26802a543e9096b9 Mon Sep 17 00:00:00 2001 From: Bryan Ross Date: Thu, 7 Apr 2022 22:01:23 -0600 Subject: [PATCH 1/4] fix: non leading-tabs in markdown content (#1559) Only replaces tabs at the beginning of a block construct. Tabs in the middle of the item are unaffected. All tests passing. Tabs in both GFM and CommonMark at 100% fixes #1559 --- src/Lexer.js | 12 ++++++++++-- src/Tokenizer.js | 2 +- src/rules.js | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index eb182bd4bc..c2ffd90ac6 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -115,8 +115,7 @@ export class Lexer { */ lex(src) { src = src - .replace(/\r\n|\r/g, '\n') - .replace(/\t/g, ' '); + .replace(/\r\n|\r/g, '\n'); this.blockTokens(src, this.tokens); @@ -135,6 +134,15 @@ export class Lexer { if (this.options.pedantic) { src = src.replace(/^ +$/gm, ''); } + + if (this.options.gfm || this.options.pedantic) { + src = src.replace(/\t/g, ' '); + } else { + src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { + return leading + ' '.repeat(tabs.length); + }); + } + let token, lastToken, cutSrc, lastParagraphClipped; while (src) { diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 2efb3b7ea7..92e8ee110b 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -187,7 +187,7 @@ export class Tokenizer { } // Get next list item - const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*)?(?:\\n|$))`); + const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`); // Check if current bullet point can start a new List Item while (src) { diff --git a/src/rules.js b/src/rules.js index 58b917d2b2..25d1415301 100644 --- a/src/rules.js +++ b/src/rules.js @@ -11,10 +11,10 @@ export const block = { newline: /^(?: *(?:\n|$))+/, code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/, - hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, + hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/, heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, - list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/, + list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/, html: '^ {0,3}(?:' // optional indentation + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) + '|comment[^\\n]*(\\n+|$)' // (2) From 1b23ef8661bc889550cf4e8c98502f1bd48ef7ef Mon Sep 17 00:00:00 2001 From: Bryan Ross Date: Thu, 7 Apr 2022 23:27:32 -0600 Subject: [PATCH 2/4] update new/html_comments.html to preserve tab --- src/Lexer.js | 2 +- test/specs/new/html_comments.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index c2ffd90ac6..c4abddec1b 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -135,7 +135,7 @@ export class Lexer { src = src.replace(/^ +$/gm, ''); } - if (this.options.gfm || this.options.pedantic) { + if (this.options.pedantic) { src = src.replace(/\t/g, ' '); } else { src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { diff --git a/test/specs/new/html_comments.html b/test/specs/new/html_comments.html index 745d823b7e..a1c1f1b7d3 100644 --- a/test/specs/new/html_comments.html +++ b/test/specs/new/html_comments.html @@ -37,7 +37,7 @@

Example 9

Example 10

From 60abe6e0836823515d04c73531e36a17ee87e594 Mon Sep 17 00:00:00 2001 From: Bryan Ross Date: Sat, 9 Apr 2022 10:40:13 -0600 Subject: [PATCH 3/4] combine redundant if condition --- src/Lexer.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index c4abddec1b..3c0a2c7e01 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -132,11 +132,7 @@ export class Lexer { */ blockTokens(src, tokens = []) { if (this.options.pedantic) { - src = src.replace(/^ +$/gm, ''); - } - - if (this.options.pedantic) { - src = src.replace(/\t/g, ' '); + src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); } else { src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { return leading + ' '.repeat(tabs.length); From 79bef12800a4da9132821948638ac085bb7160e4 Mon Sep 17 00:00:00 2001 From: Bryan Ross Date: Sun, 10 Apr 2022 11:28:42 -0600 Subject: [PATCH 4/4] add test for tab immediately after blockquote character --- src/Tokenizer.js | 2 +- test/specs/new/tab_after_blockquote.html | 1 + test/specs/new/tab_after_blockquote.md | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 test/specs/new/tab_after_blockquote.html create mode 100644 test/specs/new/tab_after_blockquote.md diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 92e8ee110b..0eec752cc3 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -151,7 +151,7 @@ export class Tokenizer { blockquote(src) { const cap = this.rules.block.blockquote.exec(src); if (cap) { - const text = cap[0].replace(/^ *> ?/gm, ''); + const text = cap[0].replace(/^ *>[ \t]?/gm, ''); return { type: 'blockquote', diff --git a/test/specs/new/tab_after_blockquote.html b/test/specs/new/tab_after_blockquote.html new file mode 100644 index 0000000000..73aab0bf78 --- /dev/null +++ b/test/specs/new/tab_after_blockquote.html @@ -0,0 +1 @@ +

test

\ No newline at end of file diff --git a/test/specs/new/tab_after_blockquote.md b/test/specs/new/tab_after_blockquote.md new file mode 100644 index 0000000000..4371a12db9 --- /dev/null +++ b/test/specs/new/tab_after_blockquote.md @@ -0,0 +1 @@ +> test \ No newline at end of file