From a9696e28989c0bea2077885bab1844525e18a031 Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Thu, 6 Jan 2022 09:31:58 -0600 Subject: [PATCH] fix: retain line breaks in tokens properly (#2341) * Fix lexer and tokenizer to retain line breaks properly * Add test for bug * Check for line breaks not just spaces * Fix lint * Fix spacing in test * clean up code Co-authored-by: Tony Brix --- src/Lexer.js | 6 +- src/Tokenizer.js | 32 ++++-- test/unit/Lexer-spec.js | 206 ++++++++++++++++++++++++++++----------- test/unit/marked-spec.js | 1 + 4 files changed, 177 insertions(+), 68 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index a214134b48..b1e8b0ba5b 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -152,7 +152,11 @@ export class Lexer { // newline if (token = this.tokenizer.space(src)) { src = src.substring(token.raw.length); - if (token.type) { + if (token.raw.length === 1 && tokens.length > 0) { + // if there's a single \n as a spacer, it's terminating the last line, + // so move it there so that we don't get unecessary paragraph tags + tokens[tokens.length - 1].raw += '\n'; + } else { tokens.push(token); } continue; diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 8ef871d606..eb42793c9b 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -72,14 +72,11 @@ export class Tokenizer { space(src) { const cap = this.rules.block.newline.exec(src); - if (cap) { - if (cap[0].length > 1) { - return { - type: 'space', - raw: cap[0] - }; - } - return { raw: '\n' }; + if (cap && cap[0].length > 0) { + return { + type: 'space', + raw: cap[0] + }; } } @@ -303,7 +300,24 @@ export class Tokenizer { for (i = 0; i < l; i++) { this.lexer.state.top = false; list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []); - if (!list.loose && list.items[i].tokens.some(t => t.type === 'space')) { + const spacers = list.items[i].tokens.filter(t => t.type === 'space'); + const hasMultipleLineBreaks = spacers.every(t => { + const chars = t.raw.split(''); + let lineBreaks = 0; + for (const char of chars) { + if (char === '\n') { + lineBreaks += 1; + } + if (lineBreaks > 1) { + return true; + } + } + + return false; + }); + + if (!list.loose && spacers.length && hasMultipleLineBreaks) { + // Having a single line break doesn't mean a list is loose. A single line break is terminating the last list item list.loose = true; list.items[i].loose = true; } diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index 110b04d240..23913b6e4a 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -93,6 +93,10 @@ lheading 2 ---------- `, tokens: [ + { + type: 'space', + raw: '\n' + }, { type: 'heading', raw: '# heading 1\n\n', @@ -175,6 +179,9 @@ lheading 2 | 1 | 2 | `, tokens: [{ + type: 'space', + raw: '\n' + }, { type: 'table', align: [null, null], raw: '| a | b |\n|---|---|\n| 1 | 2 |\n', @@ -212,40 +219,42 @@ paragraph 1 |---|---| | 1 | 2 | `, - tokens: [ - { - type: 'paragraph', - raw: 'paragraph 1', - text: 'paragraph 1', - tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }] - }, - { - type: 'table', - align: [null, null], - raw: '| a | b |\n|---|---|\n| 1 | 2 |\n', - header: [ + tokens: [{ + type: 'space', + raw: '\n' + }, { + type: 'paragraph', + raw: 'paragraph 1\n', + text: 'paragraph 1', + tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }] + }, + { + type: 'table', + align: [null, null], + raw: '| a | b |\n|---|---|\n| 1 | 2 |\n', + header: [ + { + text: 'a', + tokens: [{ type: 'text', raw: 'a', text: 'a' }] + }, + { + text: 'b', + tokens: [{ type: 'text', raw: 'b', text: 'b' }] + } + ], + rows: [ + [ { - text: 'a', - tokens: [{ type: 'text', raw: 'a', text: 'a' }] + text: '1', + tokens: [{ type: 'text', raw: '1', text: '1' }] }, { - text: 'b', - tokens: [{ type: 'text', raw: 'b', text: 'b' }] + text: '2', + tokens: [{ type: 'text', raw: '2', text: '2' }] } - ], - rows: [ - [ - { - text: '1', - tokens: [{ type: 'text', raw: '1', text: '1' }] - }, - { - text: '2', - tokens: [{ type: 'text', raw: '2', text: '2' }] - } - ] ] - } + ] + } ] }); }); @@ -258,6 +267,9 @@ paragraph 1 | 1 | 2 | 3 | `, tokens: [{ + type: 'space', + raw: '\n' + }, { type: 'table', align: ['left', 'center', 'right'], raw: '| a | b | c |\n|:--|:-:|--:|\n| 1 | 2 | 3 |\n', @@ -302,33 +314,37 @@ a | b --|-- 1 | 2 `, - tokens: [{ - type: 'table', - align: [null, null], - raw: 'a | b\n--|--\n1 | 2\n', - header: [ - { - text: 'a', - tokens: [{ type: 'text', raw: 'a', text: 'a' }] - }, - { - text: 'b', - tokens: [{ type: 'text', raw: 'b', text: 'b' }] - } - ], - rows: [ - [ + tokens: [ + { + type: 'space', + raw: '\n' + }, { + type: 'table', + align: [null, null], + raw: 'a | b\n--|--\n1 | 2\n', + header: [ { - text: '1', - tokens: [{ type: 'text', raw: '1', text: '1' }] + text: 'a', + tokens: [{ type: 'text', raw: 'a', text: 'a' }] }, { - text: '2', - tokens: [{ type: 'text', raw: '2', text: '2' }] + text: 'b', + tokens: [{ type: 'text', raw: 'b', text: 'b' }] } + ], + rows: [ + [ + { + text: '1', + tokens: [{ type: 'text', raw: '1', text: '1' }] + }, + { + text: '2', + tokens: [{ type: 'text', raw: '2', text: '2' }] + } + ] ] - ] - }] + }] }); }); }); @@ -342,6 +358,19 @@ a | b ] }); }); + + it('after line break does not consume raw \n', () => { + expectTokens({ + md: 'T\nh\n---', + tokens: + jasmine.arrayContaining([ + jasmine.objectContaining({ + raw: 'T\nh\n' + }), + { type: 'hr', raw: '---' } + ]) + }); + }); }); describe('blockquote', () => { @@ -376,8 +405,11 @@ a | b `, tokens: [ { + type: 'space', + raw: '\n' + }, { type: 'list', - raw: '- item 1\n- item 2', + raw: '- item 1\n- item 2\n', ordered: false, start: '', loose: false, @@ -423,9 +455,13 @@ a | b 2. item 2 `, tokens: jasmine.arrayContaining([ + jasmine.objectContaining({ + type: 'space', + raw: '\n' + }), jasmine.objectContaining({ type: 'list', - raw: '1. item 1\n2. item 2', + raw: '1. item 1\n2. item 2\n', ordered: true, start: 1, items: [ @@ -448,9 +484,13 @@ a | b 2) item 2 `, tokens: jasmine.arrayContaining([ + jasmine.objectContaining({ + type: 'space', + raw: '\n' + }), jasmine.objectContaining({ type: 'list', - raw: '1) item 1\n2) item 2', + raw: '1) item 1\n2) item 2\n', ordered: true, start: 1, items: [ @@ -475,6 +515,10 @@ a | b paragraph `, tokens: [ + { + type: 'space', + raw: '\n' + }, { type: 'list', raw: '- item 1\n- item 2', @@ -515,7 +559,7 @@ paragraph { type: 'space', raw: '\n\n' }, { type: 'paragraph', - raw: 'paragraph', + raw: 'paragraph\n', text: 'paragraph', tokens: [{ type: 'text', @@ -534,9 +578,13 @@ paragraph 3. item 2 `, tokens: jasmine.arrayContaining([ + jasmine.objectContaining({ + type: 'space', + raw: '\n' + }), jasmine.objectContaining({ type: 'list', - raw: '2. item 1\n3. item 2', + raw: '2. item 1\n3. item 2\n', ordered: true, start: 2, items: [ @@ -560,9 +608,13 @@ paragraph - item 2 `, tokens: jasmine.arrayContaining([ + jasmine.objectContaining({ + type: 'space', + raw: '\n' + }), jasmine.objectContaining({ type: 'list', - raw: '- item 1\n\n- item 2', + raw: '- item 1\n\n- item 2\n', loose: true, items: [ jasmine.objectContaining({ @@ -577,6 +629,40 @@ paragraph }); }); + it('not loose with spaces', () => { + expectTokens({ + md: ` +- item 1 + - item 2 +`, + tokens: jasmine.arrayContaining([ + jasmine.objectContaining({ + type: 'space', + raw: '\n' + }), + jasmine.objectContaining({ + type: 'list', + raw: '- item 1\n - item 2\n', + loose: false, + items: [ + jasmine.objectContaining({ + raw: '- item 1\n - item 2', + tokens: jasmine.arrayContaining([ + jasmine.objectContaining({ + raw: 'item 1\n' + }), + jasmine.objectContaining({ + type: 'list', + raw: '- item 2' + }) + ]) + }) + ] + }) + ]) + }); + }); + it('task', () => { expectTokens({ md: ` @@ -584,9 +670,13 @@ paragraph - [x] item 2 `, tokens: jasmine.arrayContaining([ + jasmine.objectContaining({ + type: 'space', + raw: '\n' + }), jasmine.objectContaining({ type: 'list', - raw: '- [ ] item 1\n- [x] item 2', + raw: '- [ ] item 1\n- [x] item 2\n', items: [ jasmine.objectContaining({ raw: '- [ ] item 1\n', diff --git a/test/unit/marked-spec.js b/test/unit/marked-spec.js index 64be925806..e59b5cc497 100644 --- a/test/unit/marked-spec.js +++ b/test/unit/marked-spec.js @@ -994,6 +994,7 @@ br }); expect(tokensSeen).toEqual([ + ['space', ''], ['paragraph', 'paragraph'], ['text', 'paragraph'], ['space', ''],