diff --git a/Cargo.lock b/Cargo.lock index 56c9d536..026a75ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -819,6 +819,7 @@ dependencies = [ "ignore", "itertools", "jsonpath_lib", + "lol_html", "prettydiff", "quick-xml 0.36.2", "rayon", diff --git a/crates/diff-test/Cargo.toml b/crates/diff-test/Cargo.toml index f9e9d80f..07cb4e69 100644 --- a/crates/diff-test/Cargo.toml +++ b/crates/diff-test/Cargo.toml @@ -25,3 +25,4 @@ quick-xml = "0.36" sha2 = "0.10" clap = { version = "4.5.1", features = ["derive"] } dashmap = "6" +lol_html = "2" diff --git a/crates/diff-test/src/main.rs b/crates/diff-test/src/main.rs index 6a22d009..296a7220 100644 --- a/crates/diff-test/src/main.rs +++ b/crates/diff-test/src/main.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::cmp::max; use std::collections::{BTreeMap, HashSet}; use std::fs; @@ -16,6 +17,7 @@ use ignore::types::TypesBuilder; use ignore::WalkBuilder; use itertools::Itertools; use jsonpath_lib::Compiled; +use lol_html::{element, rewrite_str, ElementContentHandlers, RewriteStrSettings, Selector}; use prettydiff::{diff_lines, diff_words}; use rayon::prelude::*; use regex::Regex; @@ -189,12 +191,27 @@ const IGNORE: &[&str] = &[ static WS_DIFF: LazyLock = LazyLock::new(|| Regex::new(r#"(?>)[\n ]+|[\n ]+(? = - LazyLock::new(|| Regex::new(r#" data-flaw-src="[^"]+""#).unwrap()); static DIFF_MAP: LazyLock>> = LazyLock::new(|| Arc::new(DashMap::new())); +/// Run html content through these handlers to clean up the html before minifying and diffing. +fn pre_diff_element_massaging_handlers<'a>() -> Vec<(Cow<'a, Selector>, ElementContentHandlers<'a>)> +{ + vec![ + // remove data-flaw-src attributes + element!("*[data-flaw-src]", |el| { + el.remove_attribute("data-flaw-src"); + Ok(()) + }), + // remove ids from notecards, example-headers, code-examples + element!("div.notecard, div.example-header, div.code-example", |el| { + el.remove_attribute("id"); + Ok(()) + }), + ] +} + fn full_diff( lhs: &Value, rhs: &Value, @@ -264,8 +281,22 @@ fn full_diff( if is_html(&lhs) && is_html(&rhs) { let lhs_t = WS_DIFF.replace_all(&lhs, "$x$y"); let rhs_t = WS_DIFF.replace_all(&rhs, "$x$y"); - let lhs_t = DATA_FLAW_SRC.replace_all(&lhs_t, ""); - let rhs_t = DATA_FLAW_SRC.replace_all(&rhs_t, ""); + let lhs_t = rewrite_str( + &lhs_t, + RewriteStrSettings { + element_content_handlers: pre_diff_element_massaging_handlers(), + ..RewriteStrSettings::new() + }, + ) + .expect("lolhtml processing failed"); + let rhs_t = rewrite_str( + &rhs_t, + RewriteStrSettings { + element_content_handlers: pre_diff_element_massaging_handlers(), + ..RewriteStrSettings::new() + }, + ) + .expect("lolhtml processing failed"); lhs = fmt_html(&html_minifier::minify(lhs_t).unwrap()); rhs = fmt_html(&html_minifier::minify(rhs_t).unwrap()); } diff --git a/crates/rari-doc/src/html/rewriter.rs b/crates/rari-doc/src/html/rewriter.rs index 9604b0af..217d1dba 100644 --- a/crates/rari-doc/src/html/rewriter.rs +++ b/crates/rari-doc/src/html/rewriter.rs @@ -296,30 +296,34 @@ pub fn post_process_html( el.set_attribute("class", &class)?; Ok(()) }), - element!("pre[class*=brush]:not(.hidden)", |el| { + element!("pre[class*=brush]", |el| { let class = el.get_attribute("class"); let class = class.as_deref().unwrap_or_default(); + let is_hidden = class.split_ascii_whitespace().any(|c| c == "hidden"); let name = class .split_ascii_whitespace() .skip_while(|s| *s != "brush:") .nth(1) .unwrap_or_default(); + if !name.is_empty() && name != "plain" { - el.before( - &concat_strs!( - r#"
"#, name, "
" - ), - ContentType::Html - ); + el.prepend("", ContentType::Html); + el.append("", ContentType::Html); + } + if is_hidden { + el.before(r#"
"#, ContentType::Html); + el.after("
", ContentType::Html); + } else if !name.is_empty() && name != "plain" { + el.before(&concat_strs!( + r#"
"#, + name, + "
"), + ContentType::Html + ); el.after("
", ContentType::Html); } Ok(()) }), - element!("pre[class*=brush].hidden", |el| { - el.before(r#"
"#, ContentType::Html); - el.after("
", ContentType::Html); - Ok(()) - }), element!( "div.notecard.warning[data-add-warning] > p:first-child", |el| { diff --git a/crates/rari-md/src/html.rs b/crates/rari-md/src/html.rs index 584c02c9..ba2705c1 100644 --- a/crates/rari-md/src/html.rs +++ b/crates/rari-md/src/html.rs @@ -727,7 +727,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { match self.plugins.render.codefence_syntax_highlighter { None => { pre_attributes.extend(code_attributes); - let with_code = if let Some(cls) = pre_attributes.get_mut("class") { + let _with_code = if let Some(cls) = pre_attributes.get_mut("class") + { if !ncb.info.is_empty() { let langs = ncb .info @@ -746,17 +747,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { false }; write_opening_tag(self.output, "pre", pre_attributes)?; - if with_code { - self.output.write_all(b"")?; - } - self.escape(literal)?; - - if with_code { - self.output.write_all(b"")?; - } else { - self.output.write_all(b"\n")? - } + self.output.write_all(b"\n")? } Some(highlighter) => { highlighter.write_pre_tag(self.output, pre_attributes)?;