diff --git a/Cargo.lock b/Cargo.lock index 73d7b64a..7173ce79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -164,7 +164,7 @@ dependencies = [ "emojis", "entities", "fmt2io", - "memchr", + "jetscii", "ntest", "percent-encoding-rfc3986", "shell-words", @@ -373,6 +373,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +[[package]] +name = "jetscii" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47f142fe24a9c9944451e8349de0a56af5f3e7226dc46f3ed4d4ecc0b85af75e" + [[package]] name = "lazy_static" version = "1.4.0" diff --git a/Cargo.toml b/Cargo.toml index de5caee5..c309ab9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,20 +38,20 @@ doc = false typed-arena = "2.0.2" entities = "1.0.1" unicode_categories = "0.1.1" -memchr = "2" shell-words = { version = "1.0", optional = true } -slug = "0.1.4" emojis = { version = "0.6.2", optional = true } arbitrary = { version = "1", optional = true, features = ["derive"] } bon = { version = "3", optional = true } caseless = "0.2.1" fmt2io = { version = "1.0.0", optional = true } +jetscii = "0.5.3" [dev-dependencies] ntest = "0.9" percent-encoding-rfc3986 = "0.1.3" strum = { version = "0.26.3", features = ["derive"] } toml = "0.7.3" +slug = "0.1.4" [features] default = ["cli", "syntect", "bon"] diff --git a/flake.nix b/flake.nix index 778a6cb9..877beb23 100644 --- a/flake.nix +++ b/flake.nix @@ -93,6 +93,7 @@ "clippy" "rustfmt" "rust-src" + "llvm-tools-preview" ]) ] ++ (with pkgs; [ diff --git a/src/html.rs b/src/html.rs index 1a61a3b7..ef3d207a 100644 --- a/src/html.rs +++ b/src/html.rs @@ -688,7 +688,7 @@ fn render_html_block<'a, T>( } else if !context.options.render.unsafe_ { context.write_str("")?; } else if context.options.extension.tagfilter { - tagfilter_block(literal, context)?; + tagfilter_block(context, literal)?; } else { context.write_str(literal)?; } @@ -1690,34 +1690,21 @@ fn tagfilter(literal: &str) -> bool { false } -fn tagfilter_block(input: &str, o: &mut dyn Write) -> fmt::Result { - let bytes = input.as_bytes(); - let size = input.len(); - let mut i = 0; - - while i < size { - let org = i; - while i < size && bytes[i] != b'<' { - i += 1; - } - - if i > org { - o.write_str(&input[org..i])?; - } - - if i >= size { - break; - } +fn tagfilter_block(output: &mut dyn Write, buffer: &str) -> fmt::Result { + let bytes = buffer.as_bytes(); + let matcher = jetscii::bytes!(b'<'); - if tagfilter(&input[i..]) { - o.write_str("<")?; + let mut offset = 0; + while let Some(i) = matcher.find(&bytes[offset..]) { + output.write_str(&buffer[offset..offset + i])?; + if tagfilter(&buffer[offset + i..]) { + output.write_str("<")?; } else { - o.write_str("<")?; + output.write_str("<")?; } - - i += 1; + offset += i + 1; } - + output.write_str(&buffer[offset..])?; Ok(()) } @@ -1741,22 +1728,20 @@ pub fn dangerous_url(input: &str) -> bool { /// URLs in attributes. See escape_href. pub fn escape(output: &mut dyn Write, buffer: &str) -> fmt::Result { let bytes = buffer.as_bytes(); - const HTML_UNSAFE: [bool; 256] = character_set!(b"&<>\""); + let matcher = jetscii::bytes!(b'"', b'&', b'<', b'>'); let mut offset = 0; - for (i, &byte) in bytes.iter().enumerate() { - if HTML_UNSAFE[byte as usize] { - let esc: &str = match byte { - b'"' => """, - b'&' => "&", - b'<' => "<", - b'>' => ">", - _ => unreachable!(), - }; - output.write_str(&buffer[offset..i])?; - output.write_str(esc)?; - offset = i + 1; - } + while let Some(i) = matcher.find(&bytes[offset..]) { + let esc: &str = match bytes[offset + i] { + b'"' => """, + b'&' => "&", + b'<' => "<", + b'>' => ">", + _ => unreachable!(), + }; + output.write_str(&buffer[offset..offset + i])?; + output.write_str(esc)?; + offset += i + 1; } output.write_str(&buffer[offset..])?; Ok(())