From d66b94114fa89b19c4b708643b061595e1f1ffec Mon Sep 17 00:00:00 2001 From: Andi Pieper Date: Tue, 29 Oct 2024 20:40:49 +0100 Subject: [PATCH] fix(html): unify code tags in pre * add html preprocessing with lol_html * added block to
 in certain circumstances on the rewriter level.

* typo
---
 Cargo.lock                           |  1 +
 crates/diff-test/Cargo.toml          |  1 +
 crates/diff-test/src/main.rs         | 39 +++++++++++++++++++++++++---
 crates/rari-doc/src/html/rewriter.rs | 28 +++++++++++---------
 crates/rari-md/src/html.rs           | 14 +++-------
 5 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 56c9d536..026a75ae 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -819,6 +819,7 @@ dependencies = [
  "ignore",
  "itertools",
  "jsonpath_lib",
+ "lol_html",
  "prettydiff",
  "quick-xml 0.36.2",
  "rayon",
diff --git a/crates/diff-test/Cargo.toml b/crates/diff-test/Cargo.toml
index f9e9d80f..07cb4e69 100644
--- a/crates/diff-test/Cargo.toml
+++ b/crates/diff-test/Cargo.toml
@@ -25,3 +25,4 @@ quick-xml = "0.36"
 sha2 = "0.10"
 clap = { version = "4.5.1", features = ["derive"] }
 dashmap = "6"
+lol_html = "2"
diff --git a/crates/diff-test/src/main.rs b/crates/diff-test/src/main.rs
index 6a22d009..296a7220 100644
--- a/crates/diff-test/src/main.rs
+++ b/crates/diff-test/src/main.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::cmp::max;
 use std::collections::{BTreeMap, HashSet};
 use std::fs;
@@ -16,6 +17,7 @@ use ignore::types::TypesBuilder;
 use ignore::WalkBuilder;
 use itertools::Itertools;
 use jsonpath_lib::Compiled;
+use lol_html::{element, rewrite_str, ElementContentHandlers, RewriteStrSettings, Selector};
 use prettydiff::{diff_lines, diff_words};
 use rayon::prelude::*;
 use regex::Regex;
@@ -189,12 +191,27 @@ const IGNORE: &[&str] = &[
 
 static WS_DIFF: LazyLock =
     LazyLock::new(|| Regex::new(r#"(?>)[\n ]+|[\n ]+(? =
-    LazyLock::new(|| Regex::new(r#" data-flaw-src="[^"]+""#).unwrap());
 
 static DIFF_MAP: LazyLock>> =
     LazyLock::new(|| Arc::new(DashMap::new()));
 
+/// Run html content through these handlers to clean up the html before minifying and diffing.
+fn pre_diff_element_massaging_handlers<'a>() -> Vec<(Cow<'a, Selector>, ElementContentHandlers<'a>)>
+{
+    vec![
+        // remove data-flaw-src attributes
+        element!("*[data-flaw-src]", |el| {
+            el.remove_attribute("data-flaw-src");
+            Ok(())
+        }),
+        // remove ids from notecards, example-headers, code-examples
+        element!("div.notecard, div.example-header, div.code-example", |el| {
+            el.remove_attribute("id");
+            Ok(())
+        }),
+    ]
+}
+
 fn full_diff(
     lhs: &Value,
     rhs: &Value,
@@ -264,8 +281,22 @@ fn full_diff(
                 if is_html(&lhs) && is_html(&rhs) {
                     let lhs_t = WS_DIFF.replace_all(&lhs, "$x$y");
                     let rhs_t = WS_DIFF.replace_all(&rhs, "$x$y");
-                    let lhs_t = DATA_FLAW_SRC.replace_all(&lhs_t, "");
-                    let rhs_t = DATA_FLAW_SRC.replace_all(&rhs_t, "");
+                    let lhs_t = rewrite_str(
+                        &lhs_t,
+                        RewriteStrSettings {
+                            element_content_handlers: pre_diff_element_massaging_handlers(),
+                            ..RewriteStrSettings::new()
+                        },
+                    )
+                    .expect("lolhtml processing failed");
+                    let rhs_t = rewrite_str(
+                        &rhs_t,
+                        RewriteStrSettings {
+                            element_content_handlers: pre_diff_element_massaging_handlers(),
+                            ..RewriteStrSettings::new()
+                        },
+                    )
+                    .expect("lolhtml processing failed");
                     lhs = fmt_html(&html_minifier::minify(lhs_t).unwrap());
                     rhs = fmt_html(&html_minifier::minify(rhs_t).unwrap());
                 }
diff --git a/crates/rari-doc/src/html/rewriter.rs b/crates/rari-doc/src/html/rewriter.rs
index 9604b0af..217d1dba 100644
--- a/crates/rari-doc/src/html/rewriter.rs
+++ b/crates/rari-doc/src/html/rewriter.rs
@@ -296,30 +296,34 @@ pub fn post_process_html(
             el.set_attribute("class", &class)?;
             Ok(())
         }),
-        element!("pre[class*=brush]:not(.hidden)", |el| {
+        element!("pre[class*=brush]", |el| {
             let class = el.get_attribute("class");
             let class = class.as_deref().unwrap_or_default();
+            let is_hidden = class.split_ascii_whitespace().any(|c| c == "hidden");
             let name = class
                 .split_ascii_whitespace()
                 .skip_while(|s| *s != "brush:")
                 .nth(1)
                 .unwrap_or_default();
+
             if !name.is_empty() && name != "plain" {
-                el.before(
-              &concat_strs!(
-                r#"
"#, name, "
" - ), - ContentType::Html - ); + el.prepend("", ContentType::Html); + el.append("", ContentType::Html); + } + if is_hidden { + el.before(r#"
"#, ContentType::Html); + el.after("
", ContentType::Html); + } else if !name.is_empty() && name != "plain" { + el.before(&concat_strs!( + r#"
"#, + name, + "
"), + ContentType::Html + ); el.after("
", ContentType::Html); } Ok(()) }), - element!("pre[class*=brush].hidden", |el| { - el.before(r#"
"#, ContentType::Html); - el.after("
", ContentType::Html); - Ok(()) - }), element!( "div.notecard.warning[data-add-warning] > p:first-child", |el| { diff --git a/crates/rari-md/src/html.rs b/crates/rari-md/src/html.rs index 584c02c9..ba2705c1 100644 --- a/crates/rari-md/src/html.rs +++ b/crates/rari-md/src/html.rs @@ -727,7 +727,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { match self.plugins.render.codefence_syntax_highlighter { None => { pre_attributes.extend(code_attributes); - let with_code = if let Some(cls) = pre_attributes.get_mut("class") { + let _with_code = if let Some(cls) = pre_attributes.get_mut("class") + { if !ncb.info.is_empty() { let langs = ncb .info @@ -746,17 +747,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { false }; write_opening_tag(self.output, "pre", pre_attributes)?; - if with_code { - self.output.write_all(b"")?; - } - self.escape(literal)?; - - if with_code { - self.output.write_all(b"
")?; - } else { - self.output.write_all(b"\n")? - } + self.output.write_all(b"\n")? } Some(highlighter) => { highlighter.write_pre_tag(self.output, pre_attributes)?;