Skip to content

Commit

Permalink
Update pulldown_cmark dep to v0.10, and add pulldown_cmark_escape dep.
Browse files Browse the repository at this point in the history
The pulldown_cmark escaping functionality is now shipped in a separate
pulldown_cmark_escape crate
(https://crates.io/crates/pulldown-cmark-escape), starting with v0.10.0.

The markdown.rs module has to be adapted to a few API changes in
pulldown_cmark, and we have to introduce explicit handling of <img> alt
text to ensure it continues to be properly escaped.

There are also a few other behavior changes that are caught by the
tests, but these actually seem to be desired, so I've updated the insta
snapshot files for those tests to incorporate those changes.
Specifically, one footnote-parsing case seems to be handled better now,
and pulldown-cmark's `push_html` now doesn't escape quotes in text nodes
anymore (see pulldown-cmark/pulldown-cmark#836).
  • Loading branch information
timonvo committed Feb 16, 2024
1 parent 73e06bd commit 19c6fac
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 48 deletions.
23 changes: 21 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion components/libs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ nom-bibtex = "0.5"
num-format = "0.4"
once_cell = "1"
percent-encoding = "2"
pulldown-cmark = { version = "0.9", default-features = false, features = ["simd"] }
pulldown-cmark = { version = "0.10", default-features = false, features = ["html", "simd"] }
pulldown-cmark-escape = { version = "0.10", default-features = false }
quickxml_to_serde = "0.5"
rayon = "1"
regex = "1"
Expand Down
1 change: 1 addition & 0 deletions components/libs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub use num_format;
pub use once_cell;
pub use percent_encoding;
pub use pulldown_cmark;
pub use pulldown_cmark_escape;
pub use quickxml_to_serde;
pub use rayon;
pub use regex;
Expand Down
88 changes: 60 additions & 28 deletions components/markdown/src/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@ use errors::bail;
use libs::gh_emoji::Replacer as EmojiReplacer;
use libs::once_cell::sync::Lazy;
use libs::pulldown_cmark as cmark;
use libs::pulldown_cmark_escape as cmark_escape;
use libs::tera;
use utils::net::is_external_link;

use crate::context::RenderContext;
use errors::{Context, Error, Result};
use libs::pulldown_cmark::escape::escape_html;
use libs::pulldown_cmark_escape::escape_html;
use libs::regex::{Regex, RegexBuilder};
use utils::site::resolve_internal_link;
use utils::slugs::slugify_anchors;
use utils::table_of_contents::{make_table_of_contents, Heading};
use utils::types::InsertAnchor;

use self::cmark::{Event, LinkType, Options, Parser, Tag};
use self::cmark::{Event, LinkType, Options, Parser, Tag, TagEnd};
use crate::codeblock::{CodeBlock, FenceSettings};
use crate::shortcode::{Shortcode, SHORTCODE_PLACEHOLDER};

Expand Down Expand Up @@ -220,15 +221,15 @@ fn get_heading_refs(events: &[Event]) -> Vec<HeadingRef> {

for (i, event) in events.iter().enumerate() {
match event {
Event::Start(Tag::Heading(level, anchor, classes)) => {
Event::Start(Tag::Heading { level, id, classes, .. }) => {
heading_refs.push(HeadingRef::new(
i,
*level as u32,
anchor.map(|a| a.to_owned()),
id.clone().map(|a| a.to_string()),
&classes.iter().map(|x| x.to_string()).collect::<Vec<_>>(),
));
}
Event::End(Tag::Heading(_, _, _)) => {
Event::End(TagEnd::Heading { .. }) => {
heading_refs.last_mut().expect("Heading end before start?").end_idx = i;
}
_ => (),
Expand All @@ -254,6 +255,10 @@ pub fn markdown_to_html(
let mut error = None;

let mut code_block: Option<CodeBlock> = None;
// Indicates whether we're in the middle of parsing a text node which will be placed in an HTML
// attribute, and which hence has to be escaped using escape_html rather than push_html's
// default HTML body escaping for text nodes.
let mut inside_attribute = false;

let mut headings: Vec<Heading> = vec![];
let mut internal_links = Vec::new();
Expand Down Expand Up @@ -294,12 +299,19 @@ pub fn markdown_to_html(

// we have some text before the shortcode, push that first
if $range.start != sc_span.start {
let content = $text[($range.start - orig_range_start)
..(sc_span.start - orig_range_start)]
.to_string()
.into();
let content: cmark::CowStr<'_> =
$text[($range.start - orig_range_start)
..(sc_span.start - orig_range_start)]
.to_string()
.into();
events.push(if $is_text {
Event::Text(content)
if inside_attribute {
let mut buffer = "".to_string();
escape_html(&mut buffer, content.as_ref()).unwrap();
Event::Html(buffer.into())
} else {
Event::Text(content)
}
} else {
Event::Html(content)
});
Expand Down Expand Up @@ -370,7 +382,13 @@ pub fn markdown_to_html(
};

if !contains_shortcode(text.as_ref()) {
events.push(Event::Text(text));
if inside_attribute {
let mut buffer = "".to_string();
escape_html(&mut buffer, text.as_ref()).unwrap();
events.push(Event::Html(buffer.into()));
} else {
events.push(Event::Text(text));
}
continue;
}

Expand All @@ -386,7 +404,7 @@ pub fn markdown_to_html(
code_block = Some(block);
events.push(Event::Html(begin.into()));
}
Event::End(Tag::CodeBlock(_)) => {
Event::End(TagEnd::CodeBlock { .. }) => {
if let Some(ref mut code_block) = code_block {
let html = code_block.highlight(&accumulated_block);
events.push(Event::Html(html.into()));
Expand All @@ -397,44 +415,53 @@ pub fn markdown_to_html(
code_block = None;
events.push(Event::Html("</code></pre>\n".into()));
}
Event::Start(Tag::Image(link_type, src, title)) => {
let link = if is_colocated_asset_link(&src) {
let link = format!("{}{}", context.current_page_permalink, &*src);
Event::Start(Tag::Image { link_type, dest_url, title, id }) => {
let link = if is_colocated_asset_link(&dest_url) {
let link = format!("{}{}", context.current_page_permalink, &*dest_url);
link.into()
} else {
src
dest_url
};

events.push(if lazy_async_image {
let mut img_before_alt: String = "<img src=\"".to_string();
cmark::escape::escape_href(&mut img_before_alt, &link)
cmark_escape::escape_href(&mut img_before_alt, &link)
.expect("Could not write to buffer");
if !title.is_empty() {
img_before_alt
.write_str("\" title=\"")
.expect("Could not write to buffer");
cmark::escape::escape_href(&mut img_before_alt, &title)
cmark_escape::escape_href(&mut img_before_alt, &title)
.expect("Could not write to buffer");
}
img_before_alt.write_str("\" alt=\"").expect("Could not write to buffer");
inside_attribute = true;
Event::Html(img_before_alt.into())
} else {
Event::Start(Tag::Image(link_type, link, title))
inside_attribute = false;
Event::Start(Tag::Image { link_type, dest_url: link, title, id })
});
}
Event::End(Tag::Image(..)) => events.push(if lazy_async_image {
Event::End(TagEnd::Image) => events.push(if lazy_async_image {
Event::Html("\" loading=\"lazy\" decoding=\"async\" />".into())
} else {
event
}),
Event::Start(Tag::Link(link_type, link, title)) if link.is_empty() => {
Event::Start(Tag::Link { link_type, dest_url, title, id })
if dest_url.is_empty() =>
{
error = Some(Error::msg("There is a link that is missing a URL"));
events.push(Event::Start(Tag::Link(link_type, "#".into(), title)));
events.push(Event::Start(Tag::Link {
link_type,
dest_url: "#".into(),
title,
id,
}));
}
Event::Start(Tag::Link(link_type, link, title)) => {
Event::Start(Tag::Link { link_type, dest_url, title, id }) => {
let fixed_link = match fix_link(
link_type,
&link,
&dest_url,
context,
&mut internal_links,
&mut external_links,
Expand All @@ -448,12 +475,12 @@ pub fn markdown_to_html(
};

events.push(
if is_external_link(&link)
if is_external_link(&dest_url)
&& context.config.markdown.has_external_link_tweaks()
{
let mut escaped = String::new();
// write_str can fail but here there are no reasons it should (afaik?)
cmark::escape::escape_href(&mut escaped, &link)
cmark_escape::escape_href(&mut escaped, &dest_url)
.expect("Could not write to buffer");
Event::Html(
context
Expand All @@ -463,7 +490,12 @@ pub fn markdown_to_html(
.into(),
)
} else {
Event::Start(Tag::Link(link_type, fixed_link.into(), title))
Event::Start(Tag::Link {
link_type,
dest_url: fixed_link.into(),
title,
id,
})
},
)
}
Expand All @@ -485,7 +517,7 @@ pub fn markdown_to_html(

events.push(event);
}
Event::End(Tag::Paragraph) => {
Event::End(TagEnd::Paragraph) => {
events.push(if stop_next_end_p {
stop_next_end_p = false;
Event::Html("".into())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
---
source: components/rendering/tests/markdown.rs
assertion_line: 358
source: components/markdown/tests/markdown.rs
expression: body

---
<!-- Adapted from https://markdown-it.github.io/ -->
<h1 id="h1-heading">h1 Heading</h1>
Expand Down Expand Up @@ -83,7 +81,7 @@ line 1 of code
line 2 of code
line 3 of code
</code></pre>
<p>Block code &quot;fences&quot;</p>
<p>Block code "fences"</p>
<pre><code>Sample text here...
</code></pre>
<p>Syntax highlighting</p>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
---
source: components/rendering/tests/markdown.rs
assertion_line: 84
source: components/markdown/tests/markdown.rs
expression: body

---
<h1 id="Hello">Hello</h1>
<h1 id="Hello-1">Hello</h1>
<h1 id="L'écologie_et_vous">L'écologie et vous</h1>
<h1 id="L&#39;écologie_et_vous">L'écologie et vous</h1>
<h1 id="hello">Hello</h1>
<h1 id="hello">Hello</h1>
<h1 id="Something_else">Hello</h1>
Expand All @@ -22,6 +20,6 @@ expression: body
<h1 id="text__there">text <sup class="footnote-reference"><a href="#1">1</a></sup> there</h1>
<div class="footnote-definition" id="1"><sup class="footnote-definition-label">1</sup>
<p>footnote</p>
<h1 id="classes" class="bold another">Classes</h1>
</div>
<h1 id="classes" class="bold another">Classes</h1>

Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
---
source: components/rendering/tests/markdown.rs
assertion_line: 79
source: components/markdown/tests/markdown.rs
expression: body

---
<h1 id="hello-1">Hello</h1>
<h1 id="hello-2">Hello</h1>
Expand All @@ -22,6 +20,6 @@ expression: body
<h1 id="text-there">text <sup class="footnote-reference"><a href="#1">1</a></sup> there</h1>
<div class="footnote-definition" id="1"><sup class="footnote-definition-label">1</sup>
<p>footnote</p>
<h1 id="classes" class="bold another">Classes</h1>
</div>
<h1 id="classes" class="bold another">Classes</h1>

Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
---
source: components/rendering/tests/shortcodes.rs
assertion_line: 104
source: components/markdown/tests/shortcodes.rs
expression: body

---
<p>{{ youtube(id=&quot;w7Ft2ymGmfc&quot;) }}</p>
<p>{{ youtube(id="w7Ft2ymGmfc") }}</p>

0 comments on commit 19c6fac

Please sign in to comment.