diff --git a/askama_escape/Cargo.toml b/askama_escape/Cargo.toml
index ee84891fa..859b903b5 100644
--- a/askama_escape/Cargo.toml
+++ b/askama_escape/Cargo.toml
@@ -15,6 +15,12 @@ appveyor = { repository = "djc/askama" }
maintenance = { status = "actively-developed" }
travis-ci = { repository = "djc/askama" }
+[dependencies]
+cfg-if ="0.1.6"
+
+[build-dependencies]
+version_check = "0.1.4"
+
[dev-dependencies]
criterion = "0.2"
diff --git a/askama_escape/benches/all.rs b/askama_escape/benches/all.rs
index e7dc7ed90..3dec4f4e7 100644
--- a/askama_escape/benches/all.rs
+++ b/askama_escape/benches/all.rs
@@ -2,77 +2,77 @@ extern crate askama_escape;
#[macro_use]
extern crate criterion;
-use askama_escape::MarkupDisplay;
+use askama_escape::escape;
use criterion::Criterion;
criterion_main!(benches);
criterion_group!(benches, functions);
fn functions(c: &mut Criterion) {
- c.bench_function("Escaping", escaping);
+ c.bench_function("toString 1 bytes", format_short);
+ c.bench_function("No Escaping 1 bytes", no_escaping_short);
+ c.bench_function("Escaping 1 bytes", escaping_short);
+ c.bench_function("toString 10 bytes", format);
+ c.bench_function("No Escaping 10 bytes", no_escaping);
+ c.bench_function("Escaping 10 bytes", escaping);
+ c.bench_function("toString 5 MB", format_long);
+ c.bench_function("No Escaping 5 MB", no_escaping_long);
+ c.bench_function("Escaping 5 MB", escaping_long);
}
-fn escaping(b: &mut criterion::Bencher) {
- let string_long = r#"
- Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris consequat tellus sit
- amet ornare fermentum. Etiam nec erat ante. In at metus a orci mollis scelerisque.
- Sed eget ultrices turpis, at sollicitudin erat. Integer hendrerit nec magna quis
- venenatis. Vivamus non dolor hendrerit, vulputate velit sed, varius nunc. Quisque
- in pharetra mi. Sed ullamcorper nibh malesuada commodo porttitor. Ut scelerisque
- sodales felis quis dignissim. Morbi aliquam finibus justo, sit amet consectetur
- mauris efficitur sit amet. Donec posuere turpis felis, eu lacinia magna accumsan
- quis. Fusce egestas lacus vel fermentum tincidunt. Phasellus a nulla eget lectus
- placerat commodo at eget nisl. Fusce cursus dui quis purus accumsan auctor.
- Donec iaculis felis quis metus consectetur porttitor.
-
- Etiam nibh mi, accumsan quis purus sed, posuere fermentum lorem. In pulvinar porta
- maximus. Fusce tincidunt lacinia tellus sit amet tincidunt. Aliquam lacus est, pulvinar
- non metus a, facilisis ultrices quam. Nulla feugiat leo in cursus eleifend. Suspendisse
- eget nisi ac justo sagittis interdum id a ipsum. Nulla mauris justo, scelerisque ac
- rutrum vitae, consequat vel ex.
-
-
- Sed sollicitudin sem mauris, at rutrum nibh egestas vel. Ut eu nisi tellus. Praesent dignissim
- orci elementum, mattis turpis eget, maximus ante. Suspendisse luctus eu felis a tempor. Morbi
- ac risus vitae sem molestie ullamcorper. Curabitur ligula augue, sollicitudin quis maximus vel,
- facilisis sed nibh. Aenean auctor magna sem, id rutrum metus convallis quis. Nullam non arcu
- dictum, lobortis erat quis, rhoncus est. Suspendisse venenatis, mi sed venenatis vehicula,
- tortor dolor egestas lectus, et efficitur turpis odio non augue. Integer velit sapien, dictum
- non egestas vitae, hendrerit sed quam. Phasellus a nunc eu erat varius imperdiet. Etiam id
- sollicitudin turpis, vitae molestie orci. Quisque ornare magna quis metus rhoncus commodo.
- Phasellus non mauris velit.
-
-
- Etiam dictum tellus ipsum, nec varius quam ornare vel. Cras vehicula diam nec sollicitudin
- ultricies. Pellentesque rhoncus sagittis nisl id facilisis. Nunc viverra convallis risus ut
- luctus. Aliquam vestibulum efficitur massa, id tempus nisi posuere a. Aliquam scelerisque
- elit justo. Nullam a ante felis. Cras vitae lorem eu nisi feugiat hendrerit. Maecenas vitae
- suscipit leo, lacinia dignissim lacus. Sed eget volutpat mi. In eu bibendum neque. Pellentesque
- finibus velit a fermentum rhoncus. Maecenas leo purus, eleifend eu lacus a, condimentum sagittis
- justo.
-
"#;
- let string_short = "Lorem ipsum dolor sit amet,bar&foo\"bar\\foo/bar";
- let empty = "";
- let no_escape = "Lorem ipsum dolor sit amet,";
- let no_escape_long = r#"
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin scelerisque eu urna in aliquet.
-Phasellus ac nulla a urna sagittis consequat id quis est. Nullam eu ex eget erat accumsan dictum
-ac lobortis urna. Etiam fermentum ut quam at dignissim. Curabitur vestibulum luctus tellus, sit
-amet lobortis augue tempor faucibus. Nullam sed felis eget odio elementum euismod in sit amet massa.
-Vestibulum sagittis purus sit amet eros auctor, sit amet pharetra purus dapibus. Donec ornare metus
-vel dictum porta. Etiam ut nisl nisi. Nullam rutrum porttitor mi. Donec aliquam ac ipsum eget
-hendrerit. Cras faucibus, eros ut pharetra imperdiet, est tellus aliquet felis, eget convallis
-lacus ipsum eget quam. Vivamus orci lorem, maximus ac mi eget, bibendum vulputate massa. In
-vestibulum dui hendrerit, vestibulum lacus sit amet, posuere erat. Vivamus euismod massa diam,
-vulputate euismod lectus vestibulum nec. Donec sit amet massa magna. Nunc ipsum nulla, euismod
-quis lacus at, gravida maximus elit. Duis tristique, nisl nullam.
- "#;
+static A: &str = "a";
+static E: &str = "<";
+
+fn escaping_short(b: &mut criterion::Bencher) {
+ b.iter(|| escape(E).to_string());
+}
+fn no_escaping_short(b: &mut criterion::Bencher) {
b.iter(|| {
- format!("{}", MarkupDisplay::from(string_long));
- format!("{}", MarkupDisplay::from(string_short));
- format!("{}", MarkupDisplay::from(empty));
- format!("{}", MarkupDisplay::from(no_escape));
- format!("{}", MarkupDisplay::from(no_escape_long));
+ escape(A).to_string();
});
}
+
+fn format_short(b: &mut criterion::Bencher) {
+ b.iter(|| A.to_string());
+}
+
+fn escaping(b: &mut criterion::Bencher) {
+ // 10 bytes at 10% escape
+ let string: &str = &[A, A, A, A, A, E, A, A, A, A, A].join("");
+
+ b.iter(|| escape(string).to_string());
+}
+
+fn no_escaping(b: &mut criterion::Bencher) {
+ let no_escape: &str = &A.repeat(10);
+
+ b.iter(|| escape(no_escape).to_string());
+}
+
+fn format(b: &mut criterion::Bencher) {
+ let string: &str = &A.repeat(10);
+
+ b.iter(|| string.to_string());
+}
+
+fn escaping_long(b: &mut criterion::Bencher) {
+ // 5 MB at 3.125% escape
+ let string: &str = &[&A.repeat(15), E, &A.repeat(16)]
+ .join("")
+ .repeat(160 * 1024);
+
+ b.iter(|| escape(string).to_string());
+}
+
+fn no_escaping_long(b: &mut criterion::Bencher) {
+ let no_escape: &str = &A.repeat(5 * 1024 * 1024);
+
+ b.iter(|| escape(no_escape).to_string());
+}
+
+fn format_long(b: &mut criterion::Bencher) {
+ let string: &str = &A.repeat(5 * 1024 * 1024);
+
+ b.iter(|| string.to_string());
+}
diff --git a/askama_escape/build.rs b/askama_escape/build.rs
new file mode 100644
index 000000000..f1dae3a6d
--- /dev/null
+++ b/askama_escape/build.rs
@@ -0,0 +1,29 @@
+extern crate version_check;
+
+use std::env;
+
+use version_check::is_min_version;
+
+fn main() {
+ enable_simd_optimizations();
+}
+
+fn enable_simd_optimizations() {
+ if is_env_set("CARGO_CFG_ASKAMA_DISABLE_AUTO_SIMD") {
+ return;
+ }
+ if !is_min_version("1.27.0")
+ .map(|(yes, _)| yes)
+ .unwrap_or(false)
+ {
+ return;
+ }
+
+ println!("cargo:rustc-cfg=askama_runtime_simd");
+ println!("cargo:rustc-cfg=askama_runtime_avx");
+ println!("cargo:rustc-cfg=askama_runtime_sse");
+}
+
+fn is_env_set(name: &str) -> bool {
+ env::var(name).is_ok()
+}
diff --git a/askama_escape/src/lib.rs b/askama_escape/src/lib.rs
index b967f1f02..075097beb 100644
--- a/askama_escape/src/lib.rs
+++ b/askama_escape/src/lib.rs
@@ -1,3 +1,6 @@
+#[macro_use]
+extern crate cfg_if;
+
use std::fmt::{self, Display, Formatter};
use std::str;
@@ -49,52 +52,486 @@ pub fn escape(s: &str) -> Escaped {
}
}
-macro_rules! escaping_body {
- ($start:ident, $i:ident, $fmt:ident, $_self:ident, $quote:expr) => {{
+pub struct Escaped<'a> {
+ bytes: &'a [u8],
+}
+
+impl<'a> Display for Escaped<'a> {
+ fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
+ _imp(self.bytes, fmt)
+ }
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", not(target_os = "windows"), askama_runtime_simd))] {
+
+ use std::arch::x86_64::*;
+ use std::mem::{self, size_of};
+ use std::sync::atomic::{AtomicUsize, Ordering};
+
+ #[inline(always)]
+ fn _imp(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
+ // https://github.com/BurntSushi/rust-memchr/blob/master/src/x86/mod.rs#L9-L29
+ static mut FN: fn(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result = detect;
+
+ fn detect(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
+ let fun = if cfg!(askama_runtime_avx) && is_x86_feature_detected!("avx2") {
+ _avx_escape as usize
+ } else if cfg!(askama_runtime_sse) && is_x86_feature_detected!("sse4.2") {
+ _sse_escape as usize
+ } else {
+ _escape as usize
+ };
+
+ let slot = unsafe { &*(&FN as *const _ as *const AtomicUsize) };
+ slot.store(fun as usize, Ordering::Relaxed);
+ unsafe {
+ mem::transmute:: fmt::Result>(fun)(bytes, fmt)
+ }
+ }
+
+ unsafe {
+ let slot = &*(&FN as *const _ as * const AtomicUsize);
+ let fun = slot.load(Ordering::Relaxed);
+ mem::transmute:: fmt::Result>(fun)(bytes, fmt)
+ }
+ }
+
+ // Subtract `b` from `a` and return the difference. `a` should be greater than
+ // or equal to `b`.
+ #[inline(always)]
+ fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(b <= a);
+ (a as usize) - (b as usize)
+ }
+ } else {
+
+ #[inline(always)]
+ fn _imp(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
+ _escape(bytes, fmt)
+ }
+ }
+}
+
+// Main instruction in escape
+// Test 'start' iterator in current position,
+// write slice since start to current position - 1,
+// escape byte with quote and
+// set 'start' iterator with next position
+macro_rules! escape_body {
+ ($i:expr, $start:ident, $fmt:ident, $bytes:ident, $quote:expr) => {{
if $start < $i {
- $fmt.write_str(unsafe { str::from_utf8_unchecked(&$_self.bytes[$start..$i]) })?;
+ #[allow(unused_unsafe)]
+ $fmt.write_str(unsafe { str::from_utf8_unchecked(&$bytes[$start..$i]) })?;
}
$fmt.write_str($quote)?;
$start = $i + 1;
}};
}
-pub struct Escaped<'a> {
- bytes: &'a [u8],
+// Select between pairs bytes - quote to call the callback
+macro_rules! bodies {
+ ($i:expr, $b:expr, $start:ident, $fmt:ident, $bytes:ident, $callback:ident) => {
+ match $b {
+ b'<' => $callback!($i, $start, $fmt, $bytes, "<"),
+ b'>' => $callback!($i, $start, $fmt, $bytes, ">"),
+ b'&' => $callback!($i, $start, $fmt, $bytes, "&"),
+ b'"' => $callback!($i, $start, $fmt, $bytes, """),
+ b'\'' => $callback!($i, $start, $fmt, $bytes, "'"),
+ b'/' => $callback!($i, $start, $fmt, $bytes, "/"),
+ _ => (),
+ }
+ };
+}
+
+// Wrap the body of the escape over the body of the mask
+// Resolve expression and do the escape body
+#[allow(unused_macros)]
+macro_rules! mask_body {
+ ($i:expr, $start:ident, $fmt:ident, $bytes:ident, $quote:expr) => {{
+ let i = $i;
+ escape_body!(i, $start, $fmt, $bytes, $quote);
+ }};
+}
+
+// Format bytes in the mask that starts in the current pointer
+#[allow(unused_macros)]
+macro_rules! mask_bodies {
+ ($mask:ident, $at:ident, $cur:ident, $ptr:ident, $start:ident, $fmt:ident, $bytes:ident) => {
+ bodies!($at + $cur, *$ptr.add($cur), $start, $fmt, $bytes, mask_body);
+
+ $mask ^= 1 << $cur;
+ if $mask == 0 {
+ break;
+ }
+
+ $cur = $mask.trailing_zeros() as usize;
+ };
+}
+
+// Write a mask
+// Select starts mask byte pointer and current position to deep find.
+// The main loop break when mask == 0
+#[allow(unused_macros)]
+macro_rules! write_mask {
+ ($mask:ident, $ptr:ident, $start_ptr:ident, $start:ident, $fmt:ident, $bytes:ident) => {{
+ let at = sub($ptr, $start_ptr);
+ let mut cur = $mask.trailing_zeros() as usize;
+
+ loop {
+ mask_bodies!($mask, at, cur, $ptr, $start, $fmt, $bytes);
+ }
+
+ debug_assert_eq!(at, sub($ptr, $start_ptr))
+ }};
+}
+
+/// Scalar html escape
+fn _escape(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
+ let mut start = 0;
+
+ for (i, b) in bytes.iter().enumerate() {
+ if b.wrapping_sub(FLAG_BELOW) <= LEN {
+ bodies!(i, *b, start, fmt, bytes, escape_body);
+ }
+ }
+
+ fmt.write_str(unsafe { str::from_utf8_unchecked(&bytes[start..]) })?;
+
+ Ok(())
+}
+
+#[cfg(all(
+ target_arch = "x86_64",
+ not(target_os = "windows"),
+ askama_runtime_simd,
+ askama_runtime_avx
+))]
+#[target_feature(enable = "avx2")]
+unsafe fn _avx_escape(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
+ const VECTOR_SIZE: usize = size_of::<__m256i>();
+ const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+ const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+ let v_flag = _mm256_set1_epi8((LEN + 1) as i8);
+ let v_flag_below = _mm256_set1_epi8(FLAG_BELOW as i8);
+
+ let len = bytes.len();
+ let start_ptr = bytes.as_ptr();
+ let mut ptr = start_ptr;
+ let mut start = 0;
+
+ // Write a sliced mask
+ macro_rules! write_forward {
+ ($mask: ident, $align:ident) => {{
+ if $mask != 0 {
+ let at = sub(ptr, start_ptr);
+ let mut cur = $mask.trailing_zeros() as usize;
+
+ while cur < $align {
+ mask_bodies!($mask, at, cur, ptr, start, fmt, bytes);
+ }
+
+ debug_assert_eq!(at, sub(ptr, start_ptr))
+ }
+ }};
+ }
+
+ if len < VECTOR_SIZE {
+ let a = _mm256_loadu_si256(ptr as *const __m256i);
+ let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below));
+ let mut mask = _mm256_movemask_epi8(cmp);
+
+ write_forward!(mask, len);
+ } else {
+ let end_ptr = bytes[len..].as_ptr();
+
+ {
+ let align = (VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)) & VECTOR_ALIGN;
+ if 0 < align {
+ let a = _mm256_loadu_si256(ptr as *const __m256i);
+ let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below));
+ let mut mask = _mm256_movemask_epi8(cmp);
+
+ write_forward!(mask, align);
+ ptr = ptr.add(align);
+
+ debug_assert!(start <= sub(ptr, start_ptr));
+ }
+ }
+
+ debug_assert!(start_ptr <= ptr && start_ptr <= end_ptr.sub(VECTOR_SIZE));
+
+ if LOOP_SIZE <= len {
+ // Main loop 128 bytes, need aligned ptr at VECTOR_SIZE
+ while ptr <= end_ptr.sub(LOOP_SIZE) {
+ // Need aligned
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i);
+ let c = _mm256_load_si256(ptr.add(VECTOR_SIZE * 2) as *const __m256i);
+ let d = _mm256_load_si256(ptr.add(VECTOR_SIZE * 3) as *const __m256i);
+ let cmp_a = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below));
+ let cmp_b = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(b, v_flag_below));
+ let cmp_c = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(c, v_flag_below));
+ let cmp_d = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(d, v_flag_below));
+ let or1 = _mm256_or_si256(cmp_a, cmp_b);
+ let or2 = _mm256_or_si256(cmp_c, cmp_d);
+
+ // Adjust the four masks in two from right to left.
+ if _mm256_movemask_epi8(_mm256_or_si256(or1, or2)) != 0 {
+ let mut mask = _mm256_movemask_epi8(cmp_a) as i64
+ | (_mm256_movemask_epi8(cmp_b) as i64) << VECTOR_SIZE;
+
+ if mask != 0 {
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
+ }
+ let ptr = ptr.add(VECTOR_SIZE + VECTOR_SIZE);
+
+ mask = _mm256_movemask_epi8(cmp_b) as i64
+ | (_mm256_movemask_epi8(cmp_c) as i64) << VECTOR_SIZE;
+
+ if mask != 0 {
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
+ }
+ }
+
+ ptr = ptr.add(LOOP_SIZE);
+
+ debug_assert!(start <= sub(ptr, start_ptr));
+ }
+ }
+
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ // Need aligned
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below));
+ let mut mask = _mm256_movemask_epi8(cmp);
+
+ if mask != 0 {
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+
+ debug_assert!(start <= sub(ptr, start_ptr));
+ }
+
+ debug_assert!(end_ptr.sub(VECTOR_SIZE) < ptr);
+
+ if ptr < end_ptr {
+ // Need aligned
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm256_load_si256(ptr as *const __m256i);
+ let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below));
+ let mut mask = _mm256_movemask_epi8(cmp);
+ let end = sub(end_ptr, ptr);
+
+ write_forward!(mask, end);
+ }
+ }
+
+ // Write since start to the end of the slice
+ debug_assert!(start <= len);
+ if start < len {
+ fmt.write_str(str::from_utf8_unchecked(&bytes[start..len]))?;
+ }
+
+ Ok(())
}
-impl<'a> ::std::fmt::Display for Escaped<'a> {
- fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
- let mut start = 0;
- for (i, b) in self.bytes.iter().enumerate() {
- if b.wrapping_sub(b'"') <= FLAG {
- match *b {
- b'<' => escaping_body!(start, i, fmt, self, "<"),
- b'>' => escaping_body!(start, i, fmt, self, ">"),
- b'&' => escaping_body!(start, i, fmt, self, "&"),
- b'"' => escaping_body!(start, i, fmt, self, """),
- b'\'' => escaping_body!(start, i, fmt, self, "'"),
- b'/' => escaping_body!(start, i, fmt, self, "/"),
- _ => (),
+#[cfg(all(
+ target_arch = "x86_64",
+ not(target_os = "windows"),
+ askama_runtime_simd,
+ askama_runtime_sse
+))]
+#[target_feature(enable = "sse4.2")]
+unsafe fn _sse_escape(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result {
+ const VECTOR_SIZE: usize = size_of::<__m128i>();
+ const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+ const LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+ const NEEDLE_LEN: i32 = 6;
+
+ let needle = _mm_setr_epi8(
+ b'<' as i8, b'>' as i8, b'&' as i8, b'"' as i8,
+ b'\'' as i8, b'/' as i8, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ );
+
+ let len = bytes.len();
+ let start_ptr = bytes.as_ptr();
+ let mut ptr = start_ptr;
+ let mut start = 0;
+
+ if len < VECTOR_SIZE {
+ let a = _mm_loadu_si128(ptr as *const __m128i);
+ let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, len as i32, 0);
+ let mut mask = _mm_extract_epi16(cmp, 0) as i16;
+
+ // No need write forward because I specified string size in
+ // compare instruction
+ if mask != 0 {
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
+ }
+ } else {
+ let end_ptr = bytes[len..].as_ptr();
+
+ {
+ let align = (VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)) & VECTOR_ALIGN;
+ if 0 < align {
+ let a = _mm_loadu_si128(ptr as *const __m128i);
+ let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, align as i32, 0);
+ let mut mask = _mm_extract_epi16(cmp, 0) as i16;
+
+ if mask != 0 {
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
+ }
+ ptr = ptr.add(align);
+
+ debug_assert!(start <= sub(ptr, start_ptr));
+ }
+ }
+
+ if LOOP_SIZE <= len {
+ // Main loop 64 bytes, need aligned ptr at VECTOR_SIZE
+ while ptr <= end_ptr.sub(LOOP_SIZE) {
+ // Need aligned
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c = _mm_load_si128(ptr.add(VECTOR_SIZE * 2) as *const __m128i);
+ let d = _mm_load_si128(ptr.add(VECTOR_SIZE * 3) as *const __m128i);
+ let cmp_a = _mm_cmpestrm(needle, NEEDLE_LEN, a, VECTOR_SIZE as i32, 0);
+ let cmp_b = _mm_cmpestrm(needle, NEEDLE_LEN, b, VECTOR_SIZE as i32, 0);
+ let cmp_c = _mm_cmpestrm(needle, NEEDLE_LEN, c, VECTOR_SIZE as i32, 0);
+ let cmp_d = _mm_cmpestrm(needle, NEEDLE_LEN, d, VECTOR_SIZE as i32, 0);
+ let or1 = _mm_or_si128(cmp_a, cmp_b);
+ let or2 = _mm_or_si128(cmp_c, cmp_d);
+
+ // Adjust the four masks in one from right to left.
+ if _mm_extract_epi16(_mm_or_si128(or1, or2), 0) != 0 {
+ let mut mask = _mm_extract_epi16(cmp_a, 0) as i64
+ | (_mm_extract_epi16(cmp_b, 0) as i64) << VECTOR_SIZE
+ | (_mm_extract_epi16(cmp_c, 0) as i64) << VECTOR_SIZE * 2
+ | (_mm_extract_epi16(cmp_d, 0) as i64) << VECTOR_SIZE * 3;
+
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
}
+
+ ptr = ptr.add(LOOP_SIZE);
+
+ debug_assert!(start <= sub(ptr, start_ptr));
}
}
- fmt.write_str(unsafe { str::from_utf8_unchecked(&self.bytes[start..]) })?;
- Ok(())
+
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ // Need aligned
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, VECTOR_SIZE as i32, 0);
+ let mut mask = _mm_extract_epi16(cmp, 0) as i16;
+
+ if mask != 0 {
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+
+ debug_assert!(start <= sub(ptr, start_ptr));
+ }
+
+ debug_assert!(end_ptr.sub(VECTOR_SIZE) < ptr);
+
+ if ptr < end_ptr {
+ // Need aligned
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let end = sub(end_ptr, ptr);
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, end as i32, 0);
+ let mut mask = _mm_extract_epi16(cmp, 0) as i16;
+
+ // No need write forward because I specified string size in
+ // compare instruction
+ if mask != 0 {
+ write_mask!(mask, ptr, start_ptr, start, fmt, bytes);
+ }
+ }
+ }
+
+ // Write since start to the end of the slice
+ debug_assert!(start <= len);
+ if start < len {
+ fmt.write_str(str::from_utf8_unchecked(&bytes[start..len]))?;
}
+
+ Ok(())
}
-const FLAG: u8 = b'>' - b'"';
+// Defining character interval from ASCII table to create bit masks from slice to be escaped
+const LEN: u8 = b'>' - b'"';
+const FLAG_BELOW: u8 = b'"';
#[cfg(test)]
mod tests {
use super::*;
+
#[test]
fn test_escape() {
+ let escapes = "<>&\"'/";
+ let escaped = "<>&"'/";
+ let string_long: &str = &"foobar".repeat(1024);
+
assert_eq!(escape("").to_string(), "");
assert_eq!(escape("<&>").to_string(), "<&>");
- assert_eq!(escape("bla&").to_string(), "bla&");
+ assert_eq!(escape("bar&").to_string(), "bar&");
assert_eq!(escape(" is \"unsafe\" & should be 'escaped'").to_string(),
+ "// my <html> is "unsafe" & \
+ should be 'escaped'"
+ );
+ assert_eq!(escape(&"<".repeat(16)).to_string(), "<".repeat(16));
+ assert_eq!(escape(&"<".repeat(32)).to_string(), "<".repeat(32));
+ assert_eq!(escape(&"<".repeat(64)).to_string(), "<".repeat(64));
+ assert_eq!(escape(&"<".repeat(128)).to_string(), "<".repeat(128));
+ assert_eq!(escape(&"<".repeat(1024)).to_string(), "<".repeat(1024));
+ assert_eq!(escape(&"<".repeat(129)).to_string(), "<".repeat(129));
+ assert_eq!(
+ escape(&"<".repeat(128 * 2 - 1)).to_string(),
+ "<".repeat(128 * 2 - 1)
+ );
+ assert_eq!(
+ escape(&"<".repeat(128 * 8 - 1)).to_string(),
+ "<".repeat(128 * 8 - 1)
+ );
+ assert_eq!(escape(string_long).to_string(), string_long);
+ assert_eq!(
+ escape(&[string_long, "<"].join("")).to_string(),
+ [string_long, "<"].join("")
+ );
+ assert_eq!(
+ escape(&["<", string_long].join("")).to_string(),
+ ["<", string_long].join("")
+ );
+ assert_eq!(
+ escape(&escapes.repeat(1024)).to_string(),
+ escaped.repeat(1024)
+ );
+ assert_eq!(
+ escape(&[string_long, "<", string_long].join("")).to_string(),
+ [string_long, "<", string_long].join("")
+ );
+ assert_eq!(
+ escape(&[string_long, "<", string_long, escapes, string_long,].join("")).to_string(),
+ [string_long, "<", string_long, escaped, string_long,].join("")
+ );
}
}