From 8a9573d0b1b6095248ba2869c63da8399d31acfb Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 4 Oct 2024 12:39:57 -0700 Subject: [PATCH 1/3] feat!: round-trip indented code blocks This raises the number of passing spec tests from 459 to 473. --- src/lib.rs | 52 ++++++++++++++----- src/source_range.rs | 6 +-- tests/display.rs | 4 +- .../snapshots/stupicat-indented-code-block | 8 ++- .../snapshots/stupicat-lists-nested-output | 5 +- tests/fmt.rs | 30 ++++++++++- 6 files changed, 77 insertions(+), 28 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 200a7b7..203b4ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ use std::{ }; use pulldown_cmark::{ - Alignment as TableAlignment, BlockQuoteKind, Event, HeadingLevel, LinkType, MetadataBlockKind, Tag, TagEnd, + Alignment as TableAlignment, BlockQuoteKind, Event, HeadingLevel, LinkType, MetadataBlockKind, Tag, TagEnd }; mod source_range; @@ -41,6 +41,12 @@ impl<'a> From<&'a TableAlignment> for Alignment { } } +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CodeBlockKind { + Indented, + Fenced, +} + /// The state of the [`cmark_resume()`] and [`cmark_resume_with_options()`] functions. /// This does not only allow introspection, but enables the user /// to halt the serialization at any time, and resume it later. @@ -60,7 +66,7 @@ pub struct State<'a> { /// The last seen text when serializing a header pub text_for_header: Option, /// Is set while we are handling text in a code block - pub is_in_code_block: bool, + pub code_block: Option, /// True if the last event was text and the text does not have trailing newline. Used to inject additional newlines before code block end fence. pub last_was_text_without_trailing_newline: bool, /// True if the last event was a paragraph start. Used to escape spaces at start of line (prevent spurrious indented code). @@ -83,6 +89,12 @@ pub struct State<'a> { pub last_event_end_index: usize, } +impl State<'_> { + pub fn is_in_code_block(&self) -> bool { + self.code_block.is_some() + } +} + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum LinkCategory<'a> { AngleBracketed, @@ -232,7 +244,7 @@ where E: Borrow>, F: fmt::Write, { - use pulldown_cmark::{CodeBlockKind, Event::*, Tag::*}; + use pulldown_cmark::{Event::*, Tag::*}; let last_was_text_without_trailing_newline = state.last_was_text_without_trailing_newline; state.last_was_text_without_trailing_newline = false; @@ -446,15 +458,19 @@ where formatter.write_char('\n').and(padding(formatter, &state.padding)) } } - CodeBlock(CodeBlockKind::Indented) => { - state.is_in_code_block = true; - for _ in 0..options.code_block_token_count { - formatter.write_char(options.code_block_token)?; + CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => { + state.code_block = Some(CodeBlockKind::Indented); + state.padding.push(" ".into()); + if consumed_newlines { + formatter.write_str(" ") + } else { + formatter + .write_char('\n') + .and_then(|()| padding(formatter, &state.padding)) } - formatter.write_char('\n').and(padding(formatter, &state.padding)) } - CodeBlock(CodeBlockKind::Fenced(info)) => { - state.is_in_code_block = true; + CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(info)) => { + state.code_block = Some(CodeBlockKind::Fenced); let s = if consumed_newlines { Ok(()) } else { @@ -592,13 +608,21 @@ where if state.newlines_before_start < options.newlines_after_codeblock { state.newlines_before_start = options.newlines_after_codeblock; } - state.is_in_code_block = false; if last_was_text_without_trailing_newline { formatter.write_char('\n')?; } - for _ in 0..options.code_block_token_count { - formatter.write_char(options.code_block_token)?; + match state.code_block { + Some(CodeBlockKind::Fenced) => { + for _ in 0..options.code_block_token_count { + formatter.write_char(options.code_block_token)?; + } + } + Some(CodeBlockKind::Indented) => { + state.padding.pop(); + } + None => {} } + state.code_block = None; Ok(()) } TagEnd::HtmlBlock => { @@ -728,7 +752,7 @@ where } state.last_was_text_without_trailing_newline = !text.ends_with('\n'); print_text_without_trailing_newline( - &escape_leading_special_characters(text, state.is_in_code_block, options), + &escape_leading_special_characters(text, state.is_in_code_block(), options), formatter, &state.padding, ) diff --git a/src/source_range.rs b/src/source_range.rs index 858bf31..512b44f 100644 --- a/src/source_range.rs +++ b/src/source_range.rs @@ -48,17 +48,17 @@ where source.as_bytes().get(range.start.saturating_sub(1)) != Some(&b'\\') } _ => false, - } && !state.is_in_code_block; + } && !state.is_in_code_block(); if prevent_escape_leading_special_characters { // Hack to not escape leading special characters. - state.is_in_code_block = true; + state.code_block = Some(crate::CodeBlockKind::Fenced); } cmark_resume_one_event(event, &mut formatter, &mut state, &options)?; if prevent_escape_leading_special_characters { // Assumption: this case only happens when `event` is `Text`, // so `state.is_in_code_block` should not be changed to `true`. // Also, `state.is_in_code_block` was `false`. - state.is_in_code_block = false; + state.code_block = None; } if let (true, Some(range)) = (update_event_end_index, range) { diff --git a/tests/display.rs b/tests/display.rs index b83ca4c..3bf45b4 100644 --- a/tests/display.rs +++ b/tests/display.rs @@ -192,7 +192,7 @@ mod start { } mod end { - use pulldown_cmark::{Event::*, HeadingLevel, LinkType::*, Tag, TagEnd}; + use pulldown_cmark::{CodeBlockKind, Event::*, HeadingLevel, LinkType::*, Tag, TagEnd}; use super::{es, s}; @@ -216,7 +216,7 @@ mod end { } #[test] fn codeblock() { - assert_eq!(s(End(TagEnd::CodeBlock)), "````"); + assert_eq!(es([Start(Tag::CodeBlock(CodeBlockKind::Fenced("".into()))), End(TagEnd::CodeBlock)]), "\n````\n````"); } #[test] fn footnote_definition() { diff --git a/tests/fixtures/snapshots/stupicat-indented-code-block b/tests/fixtures/snapshots/stupicat-indented-code-block index 5ffaade..596ceb0 100644 --- a/tests/fixtures/snapshots/stupicat-indented-code-block +++ b/tests/fixtures/snapshots/stupicat-indented-code-block @@ -1,7 +1,5 @@ codeblock: -```` -fn main() { - println!("Hello, world!"); -} -```` \ No newline at end of file + fn main() { + println!("Hello, world!"); + } \ No newline at end of file diff --git a/tests/fixtures/snapshots/stupicat-lists-nested-output b/tests/fixtures/snapshots/stupicat-lists-nested-output index 7741911..6d3ee62 100644 --- a/tests/fixtures/snapshots/stupicat-lists-nested-output +++ b/tests/fixtures/snapshots/stupicat-lists-nested-output @@ -19,8 +19,7 @@ 1. list paragraph 1 - ```` - code sample - ```` + code sample + 1. list paragraph 2 \ No newline at end of file diff --git a/tests/fmt.rs b/tests/fmt.rs index 6a67637..173cd1f 100644 --- a/tests/fmt.rs +++ b/tests/fmt.rs @@ -791,7 +791,7 @@ mod codeblock { assert_eq!( fmte(&[Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced("s".into()))),]).1, State { - is_in_code_block: true, + code_block: Some(pulldown_cmark_to_cmark::CodeBlockKind::Fenced), ..Default::default() } ); @@ -865,6 +865,34 @@ mod codeblock { assert_eq!(s, "\n~~~~hi\nsome\ntext\n~~~~".to_string()); } + + #[test] + fn indented() { + assert_eq!( + fmts_both(" first\n second\nthird"), + ( + "\n first\n second\n \n\nthird".into(), + State { + newlines_before_start: 2, + ..Default::default() + } + ) + ); + } + + #[test] + fn html_indented() { + assert_eq!( + fmts_both(" \n\n "), + ( + " \n\n \n".into(), + State { + newlines_before_start: 2, + ..Default::default() + } + ) + ); + } } mod table { From db767944612b0e0068d7872ec515e3f0c23865b4 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 4 Oct 2024 12:57:37 -0700 Subject: [PATCH 2/3] feat!: correctly round-trip code blocks with no trailing newline --- src/lib.rs | 1 + tests/display.rs | 30 +++++++++++++++++++ .../snapshots/stupicat-indented-code-block | 3 +- tests/fmt.rs | 2 +- 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 203b4ea..13e51f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -610,6 +610,7 @@ where } if last_was_text_without_trailing_newline { formatter.write_char('\n')?; + padding(formatter, &state.padding)?; } match state.code_block { Some(CodeBlockKind::Fenced) => { diff --git a/tests/display.rs b/tests/display.rs index 3bf45b4..7045222 100644 --- a/tests/display.rs +++ b/tests/display.rs @@ -219,6 +219,36 @@ mod end { assert_eq!(es([Start(Tag::CodeBlock(CodeBlockKind::Fenced("".into()))), End(TagEnd::CodeBlock)]), "\n````\n````"); } #[test] + fn codeblock_in_list_item() { + assert_eq!(es([ + Start(Tag::List(None)), + Start(Tag::Item), + Start(Tag::CodeBlock(CodeBlockKind::Fenced("".into()))), + Text("foo".into()), + End(TagEnd::CodeBlock), + End(TagEnd::Item), + End(TagEnd::List(false)), + Start(Tag::Paragraph), + Text("bar".into()), + End(TagEnd::Paragraph), + ]), "* \n ````\n foo\n ````\n\nbar"); + } + #[test] + fn codeblock_indented_in_list_item() { + assert_eq!(es([ + Start(Tag::List(None)), + Start(Tag::Item), + Start(Tag::CodeBlock(CodeBlockKind::Indented)), + Text("foo".into()), + End(TagEnd::CodeBlock), + End(TagEnd::Item), + End(TagEnd::List(false)), + Start(Tag::Paragraph), + Text("bar".into()), + End(TagEnd::Paragraph), + ]), "* \n foo\n \n\nbar"); + } + #[test] fn footnote_definition() { assert_eq!(s(End(TagEnd::FootnoteDefinition)), ""); } diff --git a/tests/fixtures/snapshots/stupicat-indented-code-block b/tests/fixtures/snapshots/stupicat-indented-code-block index 596ceb0..85c74a7 100644 --- a/tests/fixtures/snapshots/stupicat-indented-code-block +++ b/tests/fixtures/snapshots/stupicat-indented-code-block @@ -2,4 +2,5 @@ codeblock: fn main() { println!("Hello, world!"); - } \ No newline at end of file + } + \ No newline at end of file diff --git a/tests/fmt.rs b/tests/fmt.rs index 173cd1f..5fc94e5 100644 --- a/tests/fmt.rs +++ b/tests/fmt.rs @@ -885,7 +885,7 @@ mod codeblock { assert_eq!( fmts_both(" \n\n "), ( - " \n\n \n".into(), + " \n\n \n ".into(), State { newlines_before_start: 2, ..Default::default() From 3fdf308560c7db1046bdca82ccd316cf16082ff5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 5 Oct 2024 09:23:21 +0200 Subject: [PATCH 3/3] Run cargo fmt and assure it doesn't regress anymore. --- .github/workflows/rust.yml | 11 ++-- src/lib.rs | 102 +++++++++++++++++++++++-------------- src/source_range.rs | 2 +- tests/display.rs | 62 +++++++++++++--------- 4 files changed, 108 insertions(+), 69 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index cc8df87..02953cd 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -10,10 +10,13 @@ jobs: build-and-test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: tests - run: | - make tests + - uses: actions/checkout@v2 + - name: cargo fmt + run: | + cargo fmt --check + - name: tests + run: | + make tests msrv: runs-on: ubuntu-latest diff --git a/src/lib.rs b/src/lib.rs index 13e51f9..4c4f5c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ use std::{ }; use pulldown_cmark::{ - Alignment as TableAlignment, BlockQuoteKind, Event, HeadingLevel, LinkType, MetadataBlockKind, Tag, TagEnd + Alignment as TableAlignment, BlockQuoteKind, Event, HeadingLevel, LinkType, MetadataBlockKind, Tag, TagEnd, }; mod source_range; @@ -98,18 +98,44 @@ impl State<'_> { #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum LinkCategory<'a> { AngleBracketed, - Reference { uri: Cow<'a, str>, title: Cow<'a, str>, id: Cow<'a, str> }, - Collapsed { uri: Cow<'a, str>, title: Cow<'a, str> }, - Shortcut { uri: Cow<'a, str>, title: Cow<'a, str> }, - Other { uri: Cow<'a, str>, title: Cow<'a, str> }, + Reference { + uri: Cow<'a, str>, + title: Cow<'a, str>, + id: Cow<'a, str>, + }, + Collapsed { + uri: Cow<'a, str>, + title: Cow<'a, str>, + }, + Shortcut { + uri: Cow<'a, str>, + title: Cow<'a, str>, + }, + Other { + uri: Cow<'a, str>, + title: Cow<'a, str>, + }, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum ImageLink<'a> { - Reference { uri: Cow<'a, str>, title: Cow<'a, str>, id: Cow<'a, str> }, - Collapsed { uri: Cow<'a, str>, title: Cow<'a, str> }, - Shortcut { uri: Cow<'a, str>, title: Cow<'a, str> }, - Other { uri: Cow<'a, str>, title: Cow<'a, str> }, + Reference { + uri: Cow<'a, str>, + title: Cow<'a, str>, + id: Cow<'a, str>, + }, + Collapsed { + uri: Cow<'a, str>, + title: Cow<'a, str>, + }, + Shortcut { + uri: Cow<'a, str>, + title: Cow<'a, str>, + }, + Other { + uri: Cow<'a, str>, + title: Cow<'a, str>, + }, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -310,7 +336,7 @@ where } None => Ok(()), } - }, + } Table(alignments) => { state.table_alignments = alignments.iter().map(From::from).collect(); Ok(()) @@ -384,14 +410,14 @@ where uri: dest_url.clone().into(), title: title.clone().into(), } - }, + } LinkType::Shortcut => { state.current_shortcut_text = Some(String::new()); ImageLink::Shortcut { uri: dest_url.clone().into(), title: title.clone().into(), } - }, + } _ => ImageLink::Other { uri: dest_url.clone().into(), title: title.clone().into(), @@ -408,7 +434,7 @@ where Paragraph => { state.last_was_paragraph_start = true; Ok(()) - }, + } Heading { level, id, @@ -528,37 +554,35 @@ where } LinkCategory::Other { uri, title } => close_link(&uri, &title, formatter, LinkType::Inline), }, - TagEnd::Image => { - match state.image_stack.pop().unwrap() { - ImageLink::Reference { uri, title, id } => { + TagEnd::Image => match state.image_stack.pop().unwrap() { + ImageLink::Reference { uri, title, id } => { + state + .shortcuts + .push((id.to_string(), uri.to_string(), title.to_string())); + formatter.write_str("][")?; + formatter.write_str(&id)?; + formatter.write_char(']') + } + ImageLink::Collapsed { uri, title } => { + if let Some(shortcut_text) = state.current_shortcut_text.take() { state .shortcuts - .push((id.to_string(), uri.to_string(), title.to_string())); - formatter.write_str("][")?; - formatter.write_str(&id)?; - formatter.write_char(']') - } - ImageLink::Collapsed { uri, title } => { - if let Some(shortcut_text) = state.current_shortcut_text.take() { - state - .shortcuts - .push((shortcut_text, uri.to_string(), title.to_string())); - } - formatter.write_str("][]") - } - ImageLink::Shortcut { uri, title } => { - if let Some(shortcut_text) = state.current_shortcut_text.take() { - state - .shortcuts - .push((shortcut_text, uri.to_string(), title.to_string())); - } - formatter.write_char(']') + .push((shortcut_text, uri.to_string(), title.to_string())); } - ImageLink::Other { uri, title } => { - close_link(uri.as_ref(), title.as_ref(), formatter, LinkType::Inline) + formatter.write_str("][]") + } + ImageLink::Shortcut { uri, title } => { + if let Some(shortcut_text) = state.current_shortcut_text.take() { + state + .shortcuts + .push((shortcut_text, uri.to_string(), title.to_string())); } + formatter.write_char(']') } - } + ImageLink::Other { uri, title } => { + close_link(uri.as_ref(), title.as_ref(), formatter, LinkType::Inline) + } + }, TagEnd::Emphasis => formatter.write_char(options.emphasis_token), TagEnd::Strong => formatter.write_str(options.strong_token), TagEnd::Heading(_) => { diff --git a/src/source_range.rs b/src/source_range.rs index 512b44f..d52a1b0 100644 --- a/src/source_range.rs +++ b/src/source_range.rs @@ -11,7 +11,7 @@ use super::{cmark_resume_one_event, fmt, Borrow, Event, Options, Range, State}; /// * Markdown source from which `event_and_ranges` are created. /// 1. **event_and_ranges** /// * An iterator over [`Event`]-range pairs, for example as returned by [`pulldown_cmark::OffsetIter`]. -/// Must match what's provided in `source`. +/// Must match what's provided in `source`. /// 1. **formatter** /// * A format writer, can be a `String`. /// 1. **state** diff --git a/tests/display.rs b/tests/display.rs index 7045222..15156b1 100644 --- a/tests/display.rs +++ b/tests/display.rs @@ -216,37 +216,49 @@ mod end { } #[test] fn codeblock() { - assert_eq!(es([Start(Tag::CodeBlock(CodeBlockKind::Fenced("".into()))), End(TagEnd::CodeBlock)]), "\n````\n````"); + assert_eq!( + es([ + Start(Tag::CodeBlock(CodeBlockKind::Fenced("".into()))), + End(TagEnd::CodeBlock) + ]), + "\n````\n````" + ); } #[test] fn codeblock_in_list_item() { - assert_eq!(es([ - Start(Tag::List(None)), - Start(Tag::Item), - Start(Tag::CodeBlock(CodeBlockKind::Fenced("".into()))), - Text("foo".into()), - End(TagEnd::CodeBlock), - End(TagEnd::Item), - End(TagEnd::List(false)), - Start(Tag::Paragraph), - Text("bar".into()), - End(TagEnd::Paragraph), - ]), "* \n ````\n foo\n ````\n\nbar"); + assert_eq!( + es([ + Start(Tag::List(None)), + Start(Tag::Item), + Start(Tag::CodeBlock(CodeBlockKind::Fenced("".into()))), + Text("foo".into()), + End(TagEnd::CodeBlock), + End(TagEnd::Item), + End(TagEnd::List(false)), + Start(Tag::Paragraph), + Text("bar".into()), + End(TagEnd::Paragraph), + ]), + "* \n ````\n foo\n ````\n\nbar" + ); } #[test] fn codeblock_indented_in_list_item() { - assert_eq!(es([ - Start(Tag::List(None)), - Start(Tag::Item), - Start(Tag::CodeBlock(CodeBlockKind::Indented)), - Text("foo".into()), - End(TagEnd::CodeBlock), - End(TagEnd::Item), - End(TagEnd::List(false)), - Start(Tag::Paragraph), - Text("bar".into()), - End(TagEnd::Paragraph), - ]), "* \n foo\n \n\nbar"); + assert_eq!( + es([ + Start(Tag::List(None)), + Start(Tag::Item), + Start(Tag::CodeBlock(CodeBlockKind::Indented)), + Text("foo".into()), + End(TagEnd::CodeBlock), + End(TagEnd::Item), + End(TagEnd::List(false)), + Start(Tag::Paragraph), + Text("bar".into()), + End(TagEnd::Paragraph), + ]), + "* \n foo\n \n\nbar" + ); } #[test] fn footnote_definition() {