diff --git a/components/markdown/src/markdown.rs b/components/markdown/src/markdown.rs index a6cb3beca6..4d30b2ac92 100644 --- a/components/markdown/src/markdown.rs +++ b/components/markdown/src/markdown.rs @@ -10,7 +10,7 @@ use utils::net::is_external_link; use crate::context::RenderContext; use errors::{Context, Error, Result}; use libs::pulldown_cmark::escape::escape_html; -use libs::regex::Regex; +use libs::regex::{Regex, RegexBuilder}; use utils::site::resolve_internal_link; use utils::slugs::slugify_anchors; use utils::table_of_contents::{make_table_of_contents, Heading}; @@ -24,6 +24,15 @@ const CONTINUE_READING: &str = ""; const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html"; static EMOJI_REPLACER: Lazy = Lazy::new(EmojiReplacer::new); +/// Set as a regex to help match some extra cases. This way, spaces and case don't matter. +static MORE_DIVIDER_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r#""#) + .case_insensitive(true) + .dot_matches_new_line(true) + .build() + .unwrap() +}); + /// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary, /// private schemes. This regex checks if the given string starts with something that just looks /// like a scheme, i.e., a case-insensitive identifier followed by a colon. @@ -485,7 +494,7 @@ pub fn markdown_to_html( }); } Event::Html(text) => { - if text.contains("") { + if !has_summary && MORE_DIVIDER_RE.is_match(&text) { has_summary = true; events.push(Event::Html(CONTINUE_READING.into())); continue; @@ -600,6 +609,8 @@ pub fn markdown_to_html( #[cfg(test)] mod tests { + use config::Config; + use super::*; #[test] @@ -644,4 +655,31 @@ mod tests { assert!(!is_colocated_asset_link(link)); } } + + #[test] + // Tests for summary being split out + fn test_summary_split() { + let top = "Here's a compelling summary."; + let top_rendered = format!("

{top}

"); + let bottom = "Here's the compelling conclusion."; + let bottom_rendered = format!("

{bottom}

"); + // FIXME: would add a test that includes newlines, but due to the way pulldown-cmark parses HTML nodes, these are passed as separate HTML events. see: https://github.com/raphlinus/pulldown-cmark/issues/803 + let mores = + ["", "", "", "", ""]; + let config = Config::default(); + let context = RenderContext::from_config(&config); + for more in mores { + let content = format!("{top}\n\n{more}\n\n{bottom}"); + let rendered = markdown_to_html(&content, &context, vec![]).unwrap(); + assert!(rendered.summary_len.is_some(), "no summary when splitting on {more}"); + let summary_len = rendered.summary_len.unwrap(); + let summary = &rendered.body[..summary_len].trim(); + let body = &rendered.body[summary_len..].trim(); + let continue_reading = &body[..CONTINUE_READING.len()]; + let body = &body[CONTINUE_READING.len()..].trim(); + assert_eq!(summary, &top_rendered); + assert_eq!(continue_reading, CONTINUE_READING); + assert_eq!(body, &bottom_rendered); + } + } }