From 7ab818502ed537ab8eed22dc9cf4f7602232aa53 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Wed, 12 Jun 2024 00:53:43 -0500 Subject: [PATCH] fix: blockquote code continuation (#3264) BREAKING CHANGE: add space token after blockquote and hr if there are multiple newlines --- src/Lexer.ts | 9 +-- src/Tokenizer.ts | 85 +++++++++++++++++++--- test/specs/commonmark/commonmark.0.31.json | 6 +- test/specs/gfm/commonmark.0.31.json | 6 +- test/unit/marked.test.js | 4 +- 5 files changed, 86 insertions(+), 24 deletions(-) diff --git a/src/Lexer.ts b/src/Lexer.ts index d6be1afd3c..dc2cea1231 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -101,9 +101,9 @@ export class _Lexer { /** * Lexing */ - blockTokens(src: string, tokens?: Token[]): Token[]; - blockTokens(src: string, tokens?: TokensList): TokensList; - blockTokens(src: string, tokens: Token[] = []) { + blockTokens(src: string, tokens?: Token[], lastParagraphClipped?: boolean): Token[]; + blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList; + blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) { if (this.options.pedantic) { src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); } else { @@ -115,7 +115,6 @@ export class _Lexer { let token: Tokens.Generic | undefined; let lastToken; let cutSrc; - let lastParagraphClipped; while (src) { if (this.options.extensions @@ -249,7 +248,7 @@ export class _Lexer { } if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) { lastToken = tokens[tokens.length - 1]; - if (lastParagraphClipped && lastToken.type === 'paragraph') { + if (lastParagraphClipped && lastToken?.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; this.inlineQueue.pop(); diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 10b31454e2..63156d7e82 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -7,7 +7,7 @@ import { } from './helpers.ts'; import type { Rules } from './rules.ts'; import type { _Lexer } from './Lexer.ts'; -import type { Links, Tokens } from './Tokens.ts'; +import type { Links, Tokens, Token } from './Tokens.ts'; import type { MarkedOptions } from './MarkedOptions.ts'; function outputLink(cap: string[], link: Pick, raw: string, lexer: _Lexer): Tokens.Link | Tokens.Image { @@ -148,7 +148,7 @@ export class _Tokenizer { if (cap) { return { type: 'hr', - raw: cap[0] + raw: rtrim(cap[0], '\n') }; } } @@ -156,16 +156,81 @@ export class _Tokenizer { blockquote(src: string): Tokens.Blockquote | undefined { const cap = this.rules.block.blockquote.exec(src); if (cap) { - // precede setext continuation with 4 spaces so it isn't a setext - let text = cap[0].replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1'); - text = rtrim(text.replace(/^ *>[ \t]?/gm, ''), '\n'); - const top = this.lexer.state.top; - this.lexer.state.top = true; - const tokens = this.lexer.blockTokens(text); - this.lexer.state.top = top; + let lines = rtrim(cap[0], '\n').split('\n'); + let raw = ''; + let text = ''; + const tokens: Token[] = []; + + while (lines.length > 0) { + let inBlockquote = false; + const currentLines = []; + + let i; + for (i = 0; i < lines.length; i++) { + // get lines up to a continuation + if (/^ {0,3}>/.test(lines[i])) { + currentLines.push(lines[i]); + inBlockquote = true; + } else if (!inBlockquote) { + currentLines.push(lines[i]); + } else { + break; + } + } + lines = lines.slice(i); + + const currentRaw = currentLines.join('\n'); + const currentText = currentRaw + // precede setext continuation with 4 spaces so it isn't a setext + .replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1') + .replace(/^ {0,3}>[ \t]?/gm, ''); + raw = raw ? `${raw}\n${currentRaw}` : currentRaw; + text = text ? `${text}\n${currentText}` : currentText; + + // parse blockquote lines as top level tokens + // merge paragraphs if this is a continuation + const top = this.lexer.state.top; + this.lexer.state.top = true; + this.lexer.blockTokens(currentText, tokens, true); + this.lexer.state.top = top; + + // if there is no continuation then we are done + if (lines.length === 0) { + break; + } + + const lastToken = tokens[tokens.length - 1]; + + if (lastToken?.type === 'code') { + // blockquote continuation cannot be preceded by a code block + break; + } else if (lastToken?.type === 'blockquote') { + // include continuation in nested blockquote + const oldToken = lastToken as Tokens.Blockquote; + const newText = oldToken.raw + '\n' + lines.join('\n'); + const newToken = this.blockquote(newText)!; + tokens[tokens.length - 1] = newToken; + + raw = raw.substring(0, raw.length - oldToken.raw.length) + newToken.raw; + text = text.substring(0, text.length - oldToken.text.length) + newToken.text; + break; + } else if (lastToken?.type === 'list') { + // include continuation in nested list + const oldToken = lastToken as Tokens.List; + const newText = oldToken.raw + '\n' + lines.join('\n'); + const newToken = this.list(newText)!; + tokens[tokens.length - 1] = newToken; + + raw = raw.substring(0, raw.length - lastToken.raw.length) + newToken.raw; + text = text.substring(0, text.length - oldToken.raw.length) + newToken.raw; + lines = newText.substring(tokens[tokens.length - 1].raw.length).split('\n'); + continue; + } + } + return { type: 'blockquote', - raw: cap[0], + raw, tokens, text }; diff --git a/test/specs/commonmark/commonmark.0.31.json b/test/specs/commonmark/commonmark.0.31.json index f9b5db2e61..cd0d9991e0 100644 --- a/test/specs/commonmark/commonmark.0.31.json +++ b/test/specs/commonmark/commonmark.0.31.json @@ -1887,8 +1887,7 @@ "example": 236, "start_line": 3838, "end_line": 3848, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> ```\nfoo\n```\n", @@ -1896,8 +1895,7 @@ "example": 237, "start_line": 3851, "end_line": 3861, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> foo\n - bar\n", diff --git a/test/specs/gfm/commonmark.0.31.json b/test/specs/gfm/commonmark.0.31.json index ff36b9f4d8..8cef7c132f 100644 --- a/test/specs/gfm/commonmark.0.31.json +++ b/test/specs/gfm/commonmark.0.31.json @@ -1887,8 +1887,7 @@ "example": 236, "start_line": 3838, "end_line": 3848, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> ```\nfoo\n```\n", @@ -1896,8 +1895,7 @@ "example": 237, "start_line": 3851, "end_line": 3861, - "section": "Block quotes", - "shouldFail": true + "section": "Block quotes" }, { "markdown": "> foo\n - bar\n", diff --git a/test/unit/marked.test.js b/test/unit/marked.test.js index c9c0925a12..fee341557b 100644 --- a/test/unit/marked.test.js +++ b/test/unit/marked.test.js @@ -18,7 +18,7 @@ describe('marked unit', () => { assert.strictEqual(tokens[0].type, 'paragraph'); assert.strictEqual(tokens[2].tokens[0].type, 'paragraph'); - assert.strictEqual(tokens[3].items[0].tokens[0].type, 'text'); + assert.strictEqual(tokens[4].items[0].tokens[0].type, 'text'); }); }); @@ -910,6 +910,7 @@ br ['text', 'paragraph'], ['space', ''], ['hr', '---'], + ['space', ''], ['heading', '# heading'], ['text', 'heading'], ['code', '```code```'], @@ -924,6 +925,7 @@ br ['blockquote', '> blockquote'], ['paragraph', 'blockquote'], ['text', 'blockquote'], + ['space', ''], ['list', '- list'], ['list_item', '- list'], ['text', 'list'],