From e160e5794c7b8d67603716fa26802a543e9096b9 Mon Sep 17 00:00:00 2001
From: Bryan Ross <git@rossipedia.com>
Date: Thu, 7 Apr 2022 22:01:23 -0600
Subject: [PATCH 1/4] fix: non leading-tabs in markdown content (#1559)

Only replaces tabs at the beginning of a block construct. Tabs in the
middle of the item are unaffected.

All tests passing. Tabs in both GFM and CommonMark at 100%

fixes #1559
---
 src/Lexer.js     | 12 ++++++++++--
 src/Tokenizer.js |  2 +-
 src/rules.js     |  4 ++--
 3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/src/Lexer.js b/src/Lexer.js
index eb182bd4bc..c2ffd90ac6 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -115,8 +115,7 @@ export class Lexer {
    */
   lex(src) {
     src = src
-      .replace(/\r\n|\r/g, '\n')
-      .replace(/\t/g, '    ');
+      .replace(/\r\n|\r/g, '\n');
 
     this.blockTokens(src, this.tokens);
 
@@ -135,6 +134,15 @@ export class Lexer {
     if (this.options.pedantic) {
       src = src.replace(/^ +$/gm, '');
     }
+
+    if (this.options.gfm || this.options.pedantic) {
+      src = src.replace(/\t/g, '    ');
+    } else {
+      src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
+        return leading + '    '.repeat(tabs.length);
+      });
+    }
+
     let token, lastToken, cutSrc, lastParagraphClipped;
 
     while (src) {
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 2efb3b7ea7..92e8ee110b 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -187,7 +187,7 @@ export class Tokenizer {
       }
 
       // Get next list item
-      const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*)?(?:\\n|$))`);
+      const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`);
 
       // Check if current bullet point can start a new List Item
       while (src) {
diff --git a/src/rules.js b/src/rules.js
index 58b917d2b2..25d1415301 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -11,10 +11,10 @@ export const block = {
   newline: /^(?: *(?:\n|$))+/,
   code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
   fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
-  hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
+  hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,
   heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
   blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
-  list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
+  list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/,
   html: '^ {0,3}(?:' // optional indentation
     + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
     + '|comment[^\\n]*(\\n+|$)' // (2)

From 1b23ef8661bc889550cf4e8c98502f1bd48ef7ef Mon Sep 17 00:00:00 2001
From: Bryan Ross <git@rossipedia.com>
Date: Thu, 7 Apr 2022 23:27:32 -0600
Subject: [PATCH 2/4] update new/html_comments.html to preserve tab

---
 src/Lexer.js                      | 2 +-
 test/specs/new/html_comments.html | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Lexer.js b/src/Lexer.js
index c2ffd90ac6..c4abddec1b 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -135,7 +135,7 @@ export class Lexer {
       src = src.replace(/^ +$/gm, '');
     }
 
-    if (this.options.gfm || this.options.pedantic) {
+    if (this.options.pedantic) {
       src = src.replace(/\t/g, '    ');
     } else {
       src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
diff --git a/test/specs/new/html_comments.html b/test/specs/new/html_comments.html
index 745d823b7e..a1c1f1b7d3 100644
--- a/test/specs/new/html_comments.html
+++ b/test/specs/new/html_comments.html
@@ -37,7 +37,7 @@ <h3 id="example-9">Example 9</h3>
 <h3 id="example-10">Example 10</h3>
 
 <!-- multi
-line    
+line	
 comment
 -->
 

From 60abe6e0836823515d04c73531e36a17ee87e594 Mon Sep 17 00:00:00 2001
From: Bryan Ross <bryan.ross@docker.com>
Date: Sat, 9 Apr 2022 10:40:13 -0600
Subject: [PATCH 3/4] combine redundant if condition

---
 src/Lexer.js | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/Lexer.js b/src/Lexer.js
index c4abddec1b..3c0a2c7e01 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -132,11 +132,7 @@ export class Lexer {
    */
   blockTokens(src, tokens = []) {
     if (this.options.pedantic) {
-      src = src.replace(/^ +$/gm, '');
-    }
-
-    if (this.options.pedantic) {
-      src = src.replace(/\t/g, '    ');
+      src = src.replace(/\t/g, '    ').replace(/^ +$/gm, '');
     } else {
       src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
         return leading + '    '.repeat(tabs.length);

From 79bef12800a4da9132821948638ac085bb7160e4 Mon Sep 17 00:00:00 2001
From: Bryan Ross <git@rossipedia.com>
Date: Sun, 10 Apr 2022 11:28:42 -0600
Subject: [PATCH 4/4] add test for tab immediately after blockquote character

---
 src/Tokenizer.js                         | 2 +-
 test/specs/new/tab_after_blockquote.html | 1 +
 test/specs/new/tab_after_blockquote.md   | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 test/specs/new/tab_after_blockquote.html
 create mode 100644 test/specs/new/tab_after_blockquote.md

diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 92e8ee110b..0eec752cc3 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -151,7 +151,7 @@ export class Tokenizer {
   blockquote(src) {
     const cap = this.rules.block.blockquote.exec(src);
     if (cap) {
-      const text = cap[0].replace(/^ *> ?/gm, '');
+      const text = cap[0].replace(/^ *>[ \t]?/gm, '');
 
       return {
         type: 'blockquote',
diff --git a/test/specs/new/tab_after_blockquote.html b/test/specs/new/tab_after_blockquote.html
new file mode 100644
index 0000000000..73aab0bf78
--- /dev/null
+++ b/test/specs/new/tab_after_blockquote.html
@@ -0,0 +1 @@
+<blockquote><p>test</p></blockquote>
\ No newline at end of file
diff --git a/test/specs/new/tab_after_blockquote.md b/test/specs/new/tab_after_blockquote.md
new file mode 100644
index 0000000000..4371a12db9
--- /dev/null
+++ b/test/specs/new/tab_after_blockquote.md
@@ -0,0 +1 @@
+>	test
\ No newline at end of file