Skip to content

Commit

Permalink
fix(html): unify code tags in pre
Browse files Browse the repository at this point in the history
* add html preprocessing with lol_html

* added <code> block to <pre class="brush: ..."> in certain circumstances on the rewriter level.

* typo
  • Loading branch information
argl authored Oct 29, 2024
1 parent e6771dc commit d66b941
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 27 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/diff-test/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ quick-xml = "0.36"
sha2 = "0.10"
clap = { version = "4.5.1", features = ["derive"] }
dashmap = "6"
lol_html = "2"
39 changes: 35 additions & 4 deletions crates/diff-test/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::cmp::max;
use std::collections::{BTreeMap, HashSet};
use std::fs;
Expand All @@ -16,6 +17,7 @@ use ignore::types::TypesBuilder;
use ignore::WalkBuilder;
use itertools::Itertools;
use jsonpath_lib::Compiled;
use lol_html::{element, rewrite_str, ElementContentHandlers, RewriteStrSettings, Selector};
use prettydiff::{diff_lines, diff_words};
use rayon::prelude::*;
use regex::Regex;
Expand Down Expand Up @@ -189,12 +191,27 @@ const IGNORE: &[&str] = &[

static WS_DIFF: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"(?<x>>)[\n ]+|[\n ]+(?<y></)"#).unwrap());
static DATA_FLAW_SRC: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#" data-flaw-src="[^"]+""#).unwrap());

static DIFF_MAP: LazyLock<Arc<DashMap<String, String>>> =
LazyLock::new(|| Arc::new(DashMap::new()));

/// Run html content through these handlers to clean up the html before minifying and diffing.
fn pre_diff_element_massaging_handlers<'a>() -> Vec<(Cow<'a, Selector>, ElementContentHandlers<'a>)>
{
vec![
// remove data-flaw-src attributes
element!("*[data-flaw-src]", |el| {
el.remove_attribute("data-flaw-src");
Ok(())
}),
// remove ids from notecards, example-headers, code-examples
element!("div.notecard, div.example-header, div.code-example", |el| {
el.remove_attribute("id");
Ok(())
}),
]
}

fn full_diff(
lhs: &Value,
rhs: &Value,
Expand Down Expand Up @@ -264,8 +281,22 @@ fn full_diff(
if is_html(&lhs) && is_html(&rhs) {
let lhs_t = WS_DIFF.replace_all(&lhs, "$x$y");
let rhs_t = WS_DIFF.replace_all(&rhs, "$x$y");
let lhs_t = DATA_FLAW_SRC.replace_all(&lhs_t, "");
let rhs_t = DATA_FLAW_SRC.replace_all(&rhs_t, "");
let lhs_t = rewrite_str(
&lhs_t,
RewriteStrSettings {
element_content_handlers: pre_diff_element_massaging_handlers(),
..RewriteStrSettings::new()
},
)
.expect("lolhtml processing failed");
let rhs_t = rewrite_str(
&rhs_t,
RewriteStrSettings {
element_content_handlers: pre_diff_element_massaging_handlers(),
..RewriteStrSettings::new()
},
)
.expect("lolhtml processing failed");
lhs = fmt_html(&html_minifier::minify(lhs_t).unwrap());
rhs = fmt_html(&html_minifier::minify(rhs_t).unwrap());
}
Expand Down
28 changes: 16 additions & 12 deletions crates/rari-doc/src/html/rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,30 +296,34 @@ pub fn post_process_html<T: PageLike>(
el.set_attribute("class", &class)?;
Ok(())
}),
element!("pre[class*=brush]:not(.hidden)", |el| {
element!("pre[class*=brush]", |el| {
let class = el.get_attribute("class");
let class = class.as_deref().unwrap_or_default();
let is_hidden = class.split_ascii_whitespace().any(|c| c == "hidden");
let name = class
.split_ascii_whitespace()
.skip_while(|s| *s != "brush:")
.nth(1)
.unwrap_or_default();

if !name.is_empty() && name != "plain" {
el.before(
&concat_strs!(
r#"<div class="code-example"><div class='example-header'><span class="language-name">"#, name, "</span></div>"
),
ContentType::Html
);
el.prepend("<code>", ContentType::Html);
el.append("</code>", ContentType::Html);
}
if is_hidden {
el.before(r#"<div class="code-example">"#, ContentType::Html);
el.after("</div>", ContentType::Html);
} else if !name.is_empty() && name != "plain" {
el.before(&concat_strs!(
r#"<div class="code-example"><div class='example-header'><span class="language-name">"#,
name,
"</span></div>"),
ContentType::Html
);
el.after("</div>", ContentType::Html);
}
Ok(())
}),
element!("pre[class*=brush].hidden", |el| {
el.before(r#"<div class="code-example">"#, ContentType::Html);
el.after("</div>", ContentType::Html);
Ok(())
}),
element!(
"div.notecard.warning[data-add-warning] > p:first-child",
|el| {
Expand Down
14 changes: 3 additions & 11 deletions crates/rari-md/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
match self.plugins.render.codefence_syntax_highlighter {
None => {
pre_attributes.extend(code_attributes);
let with_code = if let Some(cls) = pre_attributes.get_mut("class") {
let _with_code = if let Some(cls) = pre_attributes.get_mut("class")
{
if !ncb.info.is_empty() {
let langs = ncb
.info
Expand All @@ -746,17 +747,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
false
};
write_opening_tag(self.output, "pre", pre_attributes)?;
if with_code {
self.output.write_all(b"<code>")?;
}

self.escape(literal)?;

if with_code {
self.output.write_all(b"</code></pre>")?;
} else {
self.output.write_all(b"</pre>\n")?
}
self.output.write_all(b"</pre>\n")?
}
Some(highlighter) => {
highlighter.write_pre_tag(self.output, pre_attributes)?;
Expand Down

0 comments on commit d66b941

Please sign in to comment.