diff --git a/CHANGELOG.md b/CHANGELOG.md index 44bd027a19..7edc406a2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 0.19.0 (unreleased) +- Updates the pulldown-cmark dependency to v0.10.0. This improves footnote handling, and may also introduce some minor behavior changes such as reducing the amount of unnecessary HTML-escaping of text content. ## 0.18.0 (2023-12-18) diff --git a/Cargo.lock b/Cargo.lock index 26050e06e7..0d90eea7c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1736,7 +1736,8 @@ dependencies = [ "num-format", "once_cell", "percent-encoding", - "pulldown-cmark", + "pulldown-cmark 0.10.0", + "pulldown-cmark-escape", "quickxml_to_serde", "rayon", "regex", @@ -2775,6 +2776,24 @@ dependencies = [ "unicase", ] +[[package]] +name = "pulldown-cmark" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce76ce678ffc8e5675b22aa1405de0b7037e2fdf8913fea40d1926c6fe1e6e7" +dependencies = [ + "bitflags 2.4.1", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d8f9aa0e3cbcfaf8bf00300004ee3b72f74770f9cbac93f6928771f613276b" + [[package]] name = "pure-rust-locales" version = "0.7.0" @@ -3341,7 +3360,7 @@ dependencies = [ "cargo_metadata", "error-chain", "glob", - "pulldown-cmark", + "pulldown-cmark 0.9.3", "tempfile", "walkdir", ] diff --git a/components/libs/Cargo.toml b/components/libs/Cargo.toml index 5f551ea663..9f6abf1d58 100644 --- a/components/libs/Cargo.toml +++ b/components/libs/Cargo.toml @@ -21,7 +21,8 @@ nom-bibtex = "0.5" num-format = "0.4" once_cell = "1" percent-encoding = "2" -pulldown-cmark = { version = "0.9", default-features = false, features = ["simd"] } +pulldown-cmark = { version = "0.10", default-features = false, features = ["html", "simd"] } +pulldown-cmark-escape = { version = "0.10", default-features = false } quickxml_to_serde = "0.5" rayon = "1" regex = "1" diff --git a/components/libs/src/lib.rs b/components/libs/src/lib.rs index d0772dc0a4..3992c2ed29 100644 --- a/components/libs/src/lib.rs +++ b/components/libs/src/lib.rs @@ -23,6 +23,7 @@ pub use num_format; pub use once_cell; pub use percent_encoding; pub use pulldown_cmark; +pub use pulldown_cmark_escape; pub use quickxml_to_serde; pub use rayon; pub use regex; diff --git a/components/markdown/src/markdown.rs b/components/markdown/src/markdown.rs index 4d30b2ac92..40c94afac5 100644 --- a/components/markdown/src/markdown.rs +++ b/components/markdown/src/markdown.rs @@ -4,19 +4,20 @@ use errors::bail; use libs::gh_emoji::Replacer as EmojiReplacer; use libs::once_cell::sync::Lazy; use libs::pulldown_cmark as cmark; +use libs::pulldown_cmark_escape as cmark_escape; use libs::tera; use utils::net::is_external_link; use crate::context::RenderContext; use errors::{Context, Error, Result}; -use libs::pulldown_cmark::escape::escape_html; +use libs::pulldown_cmark_escape::escape_html; use libs::regex::{Regex, RegexBuilder}; use utils::site::resolve_internal_link; use utils::slugs::slugify_anchors; use utils::table_of_contents::{make_table_of_contents, Heading}; use utils::types::InsertAnchor; -use self::cmark::{Event, LinkType, Options, Parser, Tag}; +use self::cmark::{Event, LinkType, Options, Parser, Tag, TagEnd}; use crate::codeblock::{CodeBlock, FenceSettings}; use crate::shortcode::{Shortcode, SHORTCODE_PLACEHOLDER}; @@ -220,15 +221,15 @@ fn get_heading_refs(events: &[Event]) -> Vec { for (i, event) in events.iter().enumerate() { match event { - Event::Start(Tag::Heading(level, anchor, classes)) => { + Event::Start(Tag::Heading { level, id, classes, .. }) => { heading_refs.push(HeadingRef::new( i, *level as u32, - anchor.map(|a| a.to_owned()), + id.clone().map(|a| a.to_string()), &classes.iter().map(|x| x.to_string()).collect::>(), )); } - Event::End(Tag::Heading(_, _, _)) => { + Event::End(TagEnd::Heading { .. }) => { heading_refs.last_mut().expect("Heading end before start?").end_idx = i; } _ => (), @@ -254,6 +255,10 @@ pub fn markdown_to_html( let mut error = None; let mut code_block: Option = None; + // Indicates whether we're in the middle of parsing a text node which will be placed in an HTML + // attribute, and which hence has to be escaped using escape_html rather than push_html's + // default HTML body escaping for text nodes. + let mut inside_attribute = false; let mut headings: Vec = vec![]; let mut internal_links = Vec::new(); @@ -294,12 +299,19 @@ pub fn markdown_to_html( // we have some text before the shortcode, push that first if $range.start != sc_span.start { - let content = $text[($range.start - orig_range_start) - ..(sc_span.start - orig_range_start)] - .to_string() - .into(); + let content: cmark::CowStr<'_> = + $text[($range.start - orig_range_start) + ..(sc_span.start - orig_range_start)] + .to_string() + .into(); events.push(if $is_text { - Event::Text(content) + if inside_attribute { + let mut buffer = "".to_string(); + escape_html(&mut buffer, content.as_ref()).unwrap(); + Event::Html(buffer.into()) + } else { + Event::Text(content) + } } else { Event::Html(content) }); @@ -370,7 +382,13 @@ pub fn markdown_to_html( }; if !contains_shortcode(text.as_ref()) { - events.push(Event::Text(text)); + if inside_attribute { + let mut buffer = "".to_string(); + escape_html(&mut buffer, text.as_ref()).unwrap(); + events.push(Event::Html(buffer.into())); + } else { + events.push(Event::Text(text)); + } continue; } @@ -386,7 +404,7 @@ pub fn markdown_to_html( code_block = Some(block); events.push(Event::Html(begin.into())); } - Event::End(Tag::CodeBlock(_)) => { + Event::End(TagEnd::CodeBlock { .. }) => { if let Some(ref mut code_block) = code_block { let html = code_block.highlight(&accumulated_block); events.push(Event::Html(html.into())); @@ -397,44 +415,53 @@ pub fn markdown_to_html( code_block = None; events.push(Event::Html("\n".into())); } - Event::Start(Tag::Image(link_type, src, title)) => { - let link = if is_colocated_asset_link(&src) { - let link = format!("{}{}", context.current_page_permalink, &*src); + Event::Start(Tag::Image { link_type, dest_url, title, id }) => { + let link = if is_colocated_asset_link(&dest_url) { + let link = format!("{}{}", context.current_page_permalink, &*dest_url); link.into() } else { - src + dest_url }; events.push(if lazy_async_image { let mut img_before_alt: String = "\"").expect("Could events.push(if lazy_async_image { + Event::End(TagEnd::Image) => events.push(if lazy_async_image { Event::Html("\" loading=\"lazy\" decoding=\"async\" />".into()) } else { event }), - Event::Start(Tag::Link(link_type, link, title)) if link.is_empty() => { + Event::Start(Tag::Link { link_type, dest_url, title, id }) + if dest_url.is_empty() => + { error = Some(Error::msg("There is a link that is missing a URL")); - events.push(Event::Start(Tag::Link(link_type, "#".into(), title))); + events.push(Event::Start(Tag::Link { + link_type, + dest_url: "#".into(), + title, + id, + })); } - Event::Start(Tag::Link(link_type, link, title)) => { + Event::Start(Tag::Link { link_type, dest_url, title, id }) => { let fixed_link = match fix_link( link_type, - &link, + &dest_url, context, &mut internal_links, &mut external_links, @@ -448,12 +475,12 @@ pub fn markdown_to_html( }; events.push( - if is_external_link(&link) + if is_external_link(&dest_url) && context.config.markdown.has_external_link_tweaks() { let mut escaped = String::new(); // write_str can fail but here there are no reasons it should (afaik?) - cmark::escape::escape_href(&mut escaped, &link) + cmark_escape::escape_href(&mut escaped, &dest_url) .expect("Could not write to buffer"); Event::Html( context @@ -463,7 +490,12 @@ pub fn markdown_to_html( .into(), ) } else { - Event::Start(Tag::Link(link_type, fixed_link.into(), title)) + Event::Start(Tag::Link { + link_type, + dest_url: fixed_link.into(), + title, + id, + }) }, ) } @@ -485,7 +517,7 @@ pub fn markdown_to_html( events.push(event); } - Event::End(Tag::Paragraph) => { + Event::End(TagEnd::Paragraph) => { events.push(if stop_next_end_p { stop_next_end_p = false; Event::Html("".into()) diff --git a/components/markdown/tests/snapshots/markdown__all_markdown_features_integration.snap b/components/markdown/tests/snapshots/markdown__all_markdown_features_integration.snap index 72aedc4c21..9e95d41805 100644 --- a/components/markdown/tests/snapshots/markdown__all_markdown_features_integration.snap +++ b/components/markdown/tests/snapshots/markdown__all_markdown_features_integration.snap @@ -1,8 +1,6 @@ --- -source: components/rendering/tests/markdown.rs -assertion_line: 358 +source: components/markdown/tests/markdown.rs expression: body - ---

h1 Heading

@@ -83,7 +81,7 @@ line 1 of code line 2 of code line 3 of code -

Block code "fences"

+

Block code "fences"

Sample text here...
 

Syntax highlighting

diff --git a/components/markdown/tests/snapshots/markdown__can_handle_heading_ids-2.snap b/components/markdown/tests/snapshots/markdown__can_handle_heading_ids-2.snap index d2847a8e94..e9fabcc4fa 100644 --- a/components/markdown/tests/snapshots/markdown__can_handle_heading_ids-2.snap +++ b/components/markdown/tests/snapshots/markdown__can_handle_heading_ids-2.snap @@ -1,12 +1,10 @@ --- -source: components/rendering/tests/markdown.rs -assertion_line: 84 +source: components/markdown/tests/markdown.rs expression: body - ---

Hello

Hello

-

L'écologie et vous

+

L'écologie et vous

Hello

Hello

Hello

@@ -22,6 +20,6 @@ expression: body

text 1 there

1

footnote

-

Classes

+

Classes

diff --git a/components/markdown/tests/snapshots/markdown__can_handle_heading_ids.snap b/components/markdown/tests/snapshots/markdown__can_handle_heading_ids.snap index 87cce483ae..663be5947f 100644 --- a/components/markdown/tests/snapshots/markdown__can_handle_heading_ids.snap +++ b/components/markdown/tests/snapshots/markdown__can_handle_heading_ids.snap @@ -1,8 +1,6 @@ --- -source: components/rendering/tests/markdown.rs -assertion_line: 79 +source: components/markdown/tests/markdown.rs expression: body - ---

Hello

Hello

@@ -22,6 +20,6 @@ expression: body

text 1 there

1

footnote

-

Classes

+

Classes

diff --git a/components/markdown/tests/snapshots/shortcodes__doesnt_render_ignored_shortcodes.snap b/components/markdown/tests/snapshots/shortcodes__doesnt_render_ignored_shortcodes.snap index 64d9bfbabe..15c94e4678 100644 --- a/components/markdown/tests/snapshots/shortcodes__doesnt_render_ignored_shortcodes.snap +++ b/components/markdown/tests/snapshots/shortcodes__doesnt_render_ignored_shortcodes.snap @@ -1,8 +1,6 @@ --- -source: components/rendering/tests/shortcodes.rs -assertion_line: 104 +source: components/markdown/tests/shortcodes.rs expression: body - --- -

{{ youtube(id="w7Ft2ymGmfc") }}

+

{{ youtube(id="w7Ft2ymGmfc") }}