From 701a86dfd998e68b615cd7f265eff88cec839d0c Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Sat, 20 Apr 2024 17:45:50 -0600 Subject: [PATCH 1/4] fix: fix code continuation in blockquote --- src/Lexer.ts | 9 ++- src/Tokenizer.ts | 73 +++++++++++++++++++--- test/specs/commonmark/commonmark.0.31.json | 6 +- test/specs/gfm/commonmark.0.31.json | 6 +- test/unit/marked.test.js | 3 +- 5 files changed, 74 insertions(+), 23 deletions(-) diff --git a/src/Lexer.ts b/src/Lexer.ts index d6be1afd3c..dc2cea1231 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -101,9 +101,9 @@ export class _Lexer { /** * Lexing */ - blockTokens(src: string, tokens?: Token[]): Token[]; - blockTokens(src: string, tokens?: TokensList): TokensList; - blockTokens(src: string, tokens: Token[] = []) { + blockTokens(src: string, tokens?: Token[], lastParagraphClipped?: boolean): Token[]; + blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList; + blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) { if (this.options.pedantic) { src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); } else { @@ -115,7 +115,6 @@ export class _Lexer { let token: Tokens.Generic | undefined; let lastToken; let cutSrc; - let lastParagraphClipped; while (src) { if (this.options.extensions @@ -249,7 +248,7 @@ export class _Lexer { } if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) { lastToken = tokens[tokens.length - 1]; - if (lastParagraphClipped && lastToken.type === 'paragraph') { + if (lastParagraphClipped && lastToken?.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; this.inlineQueue.pop(); diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 10b31454e2..77ed0b5d27 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -7,7 +7,7 @@ import { } from './helpers.ts'; import type { Rules } from './rules.ts'; import type { _Lexer } from './Lexer.ts'; -import type { Links, Tokens } from './Tokens.ts'; +import type { Links, Tokens, Token } from './Tokens.ts'; import type { MarkedOptions } from './MarkedOptions.ts'; function outputLink(cap: string[], link: Pick, raw: string, lexer: _Lexer): Tokens.Link | Tokens.Image { @@ -156,16 +156,71 @@ export class _Tokenizer { blockquote(src: string): Tokens.Blockquote | undefined { const cap = this.rules.block.blockquote.exec(src); if (cap) { - // precede setext continuation with 4 spaces so it isn't a setext - let text = cap[0].replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1'); - text = rtrim(text.replace(/^ *>[ \t]?/gm, ''), '\n'); - const top = this.lexer.state.top; - this.lexer.state.top = true; - const tokens = this.lexer.blockTokens(text); - this.lexer.state.top = top; + let lines = rtrim(cap[0], '\n').split('\n'); + let raw = ''; + let text = ''; + const tokens: Token[] = []; + + while (lines.length > 0) { + let inBlockquote = false; + const currentLines = []; + + while (lines.length > 0) { + if (/^ {0,3}>/.test(lines[0])) { + currentLines.push(lines.shift()); + inBlockquote = true; + } else if (!inBlockquote) { + currentLines.push(lines.shift()); + } else { + break; + } + } + + const currentRaw = currentLines.join('\n'); + const currentText = currentRaw + // precede setext continuation with 4 spaces so it isn't a setext + .replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1') + .replace(/^ {0,3}>[ \t]?/gm, ''); + raw = raw ? `${raw}\n${currentRaw}` : currentRaw; + text = text ? `${text}\n${currentText}` : currentText; + const top = this.lexer.state.top; + this.lexer.state.top = true; + this.lexer.blockTokens(currentText, tokens, true); + this.lexer.state.top = top; + + if (lines.length === 0) { + break; + } + + const lastToken = tokens[tokens.length - 1]; + + if (lastToken?.type === 'code') { + break; + } else if (lastToken?.type === 'blockquote') { + const oldBlockquoteToken = lastToken as Tokens.Blockquote; + const newText = oldBlockquoteToken.raw + '\n' + lines.join('\n'); + const newBlockquoteToken = this.blockquote(newText)!; + tokens[tokens.length - 1] = newBlockquoteToken; + + raw = raw.substring(0, raw.length - oldBlockquoteToken.raw.length) + newBlockquoteToken.raw; + text = text.substring(0, text.length - oldBlockquoteToken.text.length) + newBlockquoteToken.text; + break; + } else if (lastToken?.type === 'list') { + const oldListToken = lastToken as Tokens.List; + const newText = oldListToken.raw + '\n' + lines.join('\n'); + const newListToken = this.list(newText)!; + tokens[tokens.length - 1] = newListToken; + + raw = raw.substring(0, raw.length - lastToken.raw.length) + newListToken.raw; + text = text.substring(0, text.length - oldListToken.raw.length) + newListToken.raw; + lines = newText.substring(tokens[tokens.length - 1].raw.length).split('\n'); + continue; + } + } + return { type: 'blockquote', - raw: cap[0], + raw, tokens, text }; diff --git a/test/specs/commonmark/commonmark.0.31.json b/test/specs/commonmark/commonmark.0.31.json index f9b5db2e61..cd0d9991e0 100644 --- a/test/specs/commonmark/commonmark.0.31.json +++ b/test/specs/commonmark/commonmark.0.31.json @@ -1887,8 +1887,7 @@ "example": 236, "start_line": 3838, "end_line": 3848, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> ```\nfoo\n```\n", @@ -1896,8 +1895,7 @@ "example": 237, "start_line": 3851, "end_line": 3861, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> foo\n - bar\n", diff --git a/test/specs/gfm/commonmark.0.31.json b/test/specs/gfm/commonmark.0.31.json index ff36b9f4d8..8cef7c132f 100644 --- a/test/specs/gfm/commonmark.0.31.json +++ b/test/specs/gfm/commonmark.0.31.json @@ -1887,8 +1887,7 @@ "example": 236, "start_line": 3838, "end_line": 3848, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> ```\nfoo\n```\n", @@ -1896,8 +1895,7 @@ "example": 237, "start_line": 3851, "end_line": 3861, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> foo\n - bar\n", diff --git a/test/unit/marked.test.js b/test/unit/marked.test.js index c9c0925a12..f885908bb9 100644 --- a/test/unit/marked.test.js +++ b/test/unit/marked.test.js @@ -18,7 +18,7 @@ describe('marked unit', () => { assert.strictEqual(tokens[0].type, 'paragraph'); assert.strictEqual(tokens[2].tokens[0].type, 'paragraph'); - assert.strictEqual(tokens[3].items[0].tokens[0].type, 'text'); + assert.strictEqual(tokens[4].items[0].tokens[0].type, 'text'); }); }); @@ -924,6 +924,7 @@ br ['blockquote', '> blockquote'], ['paragraph', 'blockquote'], ['text', 'blockquote'], + ['space', ''], ['list', '- list'], ['list_item', '- list'], ['text', 'list'], From ce4f6968d935bdc6d9ccc06a0a8add08da05a93b Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Sat, 20 Apr 2024 22:39:37 -0600 Subject: [PATCH 2/4] fix space after hr --- src/Tokenizer.ts | 2 +- test/unit/marked.test.js | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 77ed0b5d27..2b1dd44fe9 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -148,7 +148,7 @@ export class _Tokenizer { if (cap) { return { type: 'hr', - raw: cap[0] + raw: rtrim(cap[0], '\n') }; } } diff --git a/test/unit/marked.test.js b/test/unit/marked.test.js index f885908bb9..fee341557b 100644 --- a/test/unit/marked.test.js +++ b/test/unit/marked.test.js @@ -910,6 +910,7 @@ br ['text', 'paragraph'], ['space', ''], ['hr', '---'], + ['space', ''], ['heading', '# heading'], ['text', 'heading'], ['code', '```code```'], From 2f58989b42cf93c10b741aebd0b9e08cbbae351e Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Sun, 21 Apr 2024 22:12:21 -0600 Subject: [PATCH 3/4] comment code --- src/Tokenizer.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 2b1dd44fe9..2bdd9fa0cf 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -166,6 +166,7 @@ export class _Tokenizer { const currentLines = []; while (lines.length > 0) { + // get lines up to a continuation if (/^ {0,3}>/.test(lines[0])) { currentLines.push(lines.shift()); inBlockquote = true; @@ -178,16 +179,20 @@ export class _Tokenizer { const currentRaw = currentLines.join('\n'); const currentText = currentRaw - // precede setext continuation with 4 spaces so it isn't a setext + // precede setext continuation with 4 spaces so it isn't a setext .replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1') .replace(/^ {0,3}>[ \t]?/gm, ''); raw = raw ? `${raw}\n${currentRaw}` : currentRaw; text = text ? `${text}\n${currentText}` : currentText; + + // parse blockquote lines as top level tokens + // merge paragraphs if this is a continuation const top = this.lexer.state.top; this.lexer.state.top = true; this.lexer.blockTokens(currentText, tokens, true); this.lexer.state.top = top; + // if there is no continuation then we are done if (lines.length === 0) { break; } @@ -195,8 +200,10 @@ export class _Tokenizer { const lastToken = tokens[tokens.length - 1]; if (lastToken?.type === 'code') { + // blockquote continuation cannot be preceded by a code block break; } else if (lastToken?.type === 'blockquote') { + // include continuation in nested blockquote const oldBlockquoteToken = lastToken as Tokens.Blockquote; const newText = oldBlockquoteToken.raw + '\n' + lines.join('\n'); const newBlockquoteToken = this.blockquote(newText)!; @@ -206,6 +213,7 @@ export class _Tokenizer { text = text.substring(0, text.length - oldBlockquoteToken.text.length) + newBlockquoteToken.text; break; } else if (lastToken?.type === 'list') { + // include continuation in nested list const oldListToken = lastToken as Tokens.List; const newText = oldListToken.raw + '\n' + lines.join('\n'); const newListToken = this.list(newText)!; From 738b877adf8f1adc6586573bc94ae6cda2db10a6 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Fri, 26 Apr 2024 09:41:46 -0600 Subject: [PATCH 4/4] fixes --- src/Tokenizer.ts | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 2bdd9fa0cf..63156d7e82 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -165,17 +165,19 @@ export class _Tokenizer { let inBlockquote = false; const currentLines = []; - while (lines.length > 0) { + let i; + for (i = 0; i < lines.length; i++) { // get lines up to a continuation - if (/^ {0,3}>/.test(lines[0])) { - currentLines.push(lines.shift()); + if (/^ {0,3}>/.test(lines[i])) { + currentLines.push(lines[i]); inBlockquote = true; } else if (!inBlockquote) { - currentLines.push(lines.shift()); + currentLines.push(lines[i]); } else { break; } } + lines = lines.slice(i); const currentRaw = currentLines.join('\n'); const currentText = currentRaw @@ -204,23 +206,23 @@ export class _Tokenizer { break; } else if (lastToken?.type === 'blockquote') { // include continuation in nested blockquote - const oldBlockquoteToken = lastToken as Tokens.Blockquote; - const newText = oldBlockquoteToken.raw + '\n' + lines.join('\n'); - const newBlockquoteToken = this.blockquote(newText)!; - tokens[tokens.length - 1] = newBlockquoteToken; + const oldToken = lastToken as Tokens.Blockquote; + const newText = oldToken.raw + '\n' + lines.join('\n'); + const newToken = this.blockquote(newText)!; + tokens[tokens.length - 1] = newToken; - raw = raw.substring(0, raw.length - oldBlockquoteToken.raw.length) + newBlockquoteToken.raw; - text = text.substring(0, text.length - oldBlockquoteToken.text.length) + newBlockquoteToken.text; + raw = raw.substring(0, raw.length - oldToken.raw.length) + newToken.raw; + text = text.substring(0, text.length - oldToken.text.length) + newToken.text; break; } else if (lastToken?.type === 'list') { // include continuation in nested list - const oldListToken = lastToken as Tokens.List; - const newText = oldListToken.raw + '\n' + lines.join('\n'); - const newListToken = this.list(newText)!; - tokens[tokens.length - 1] = newListToken; + const oldToken = lastToken as Tokens.List; + const newText = oldToken.raw + '\n' + lines.join('\n'); + const newToken = this.list(newText)!; + tokens[tokens.length - 1] = newToken; - raw = raw.substring(0, raw.length - lastToken.raw.length) + newListToken.raw; - text = text.substring(0, text.length - oldListToken.raw.length) + newListToken.raw; + raw = raw.substring(0, raw.length - lastToken.raw.length) + newToken.raw; + text = text.substring(0, text.length - oldToken.raw.length) + newToken.raw; lines = newText.substring(tokens[tokens.length - 1].raw.length).split('\n'); continue; }