diff --git a/askama_escape/Cargo.toml b/askama_escape/Cargo.toml index ee84891fa..859b903b5 100644 --- a/askama_escape/Cargo.toml +++ b/askama_escape/Cargo.toml @@ -15,6 +15,12 @@ appveyor = { repository = "djc/askama" } maintenance = { status = "actively-developed" } travis-ci = { repository = "djc/askama" } +[dependencies] +cfg-if ="0.1.6" + +[build-dependencies] +version_check = "0.1.4" + [dev-dependencies] criterion = "0.2" diff --git a/askama_escape/benches/all.rs b/askama_escape/benches/all.rs index e7dc7ed90..3dec4f4e7 100644 --- a/askama_escape/benches/all.rs +++ b/askama_escape/benches/all.rs @@ -2,77 +2,77 @@ extern crate askama_escape; #[macro_use] extern crate criterion; -use askama_escape::MarkupDisplay; +use askama_escape::escape; use criterion::Criterion; criterion_main!(benches); criterion_group!(benches, functions); fn functions(c: &mut Criterion) { - c.bench_function("Escaping", escaping); + c.bench_function("toString 1 bytes", format_short); + c.bench_function("No Escaping 1 bytes", no_escaping_short); + c.bench_function("Escaping 1 bytes", escaping_short); + c.bench_function("toString 10 bytes", format); + c.bench_function("No Escaping 10 bytes", no_escaping); + c.bench_function("Escaping 10 bytes", escaping); + c.bench_function("toString 5 MB", format_long); + c.bench_function("No Escaping 5 MB", no_escaping_long); + c.bench_function("Escaping 5 MB", escaping_long); } -fn escaping(b: &mut criterion::Bencher) { - let string_long = r#" - Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris consequat tellus sit - amet ornare fermentum. Etiam nec erat ante. In at metus a orci mollis scelerisque. - Sed eget ultrices turpis, at sollicitudin erat. Integer hendrerit nec magna quis - venenatis. Vivamus non dolor hendrerit, vulputate velit sed, varius nunc. Quisque - in pharetra mi. Sed ullamcorper nibh malesuada commodo porttitor. Ut scelerisque - sodales felis quis dignissim. Morbi aliquam finibus justo, sit amet consectetur - mauris efficitur sit amet. Donec posuere turpis felis, eu lacinia magna accumsan - quis. Fusce egestas lacus vel fermentum tincidunt. Phasellus a nulla eget lectus - placerat commodo at eget nisl. Fusce cursus dui quis purus accumsan auctor. - Donec iaculis felis quis metus consectetur porttitor. -

- Etiam nibh mi, accumsan quis purus sed, posuere fermentum lorem. In pulvinar porta - maximus. Fusce tincidunt lacinia tellus sit amet tincidunt. Aliquam lacus est, pulvinar - non metus a, facilisis ultrices quam. Nulla feugiat leo in cursus eleifend. Suspendisse - eget nisi ac justo sagittis interdum id a ipsum. Nulla mauris justo, scelerisque ac - rutrum vitae, consequat vel ex. -

-

- Sed sollicitudin sem mauris, at rutrum nibh egestas vel. Ut eu nisi tellus. Praesent dignissim - orci elementum, mattis turpis eget, maximus ante. Suspendisse luctus eu felis a tempor. Morbi - ac risus vitae sem molestie ullamcorper. Curabitur ligula augue, sollicitudin quis maximus vel, - facilisis sed nibh. Aenean auctor magna sem, id rutrum metus convallis quis. Nullam non arcu - dictum, lobortis erat quis, rhoncus est. Suspendisse venenatis, mi sed venenatis vehicula, - tortor dolor egestas lectus, et efficitur turpis odio non augue. Integer velit sapien, dictum - non egestas vitae, hendrerit sed quam. Phasellus a nunc eu erat varius imperdiet. Etiam id - sollicitudin turpis, vitae molestie orci. Quisque ornare magna quis metus rhoncus commodo. - Phasellus non mauris velit. -

-

- Etiam dictum tellus ipsum, nec varius quam ornare vel. Cras vehicula diam nec sollicitudin - ultricies. Pellentesque rhoncus sagittis nisl id facilisis. Nunc viverra convallis risus ut - luctus. Aliquam vestibulum efficitur massa, id tempus nisi posuere a. Aliquam scelerisque - elit justo. Nullam a ante felis. Cras vitae lorem eu nisi feugiat hendrerit. Maecenas vitae - suscipit leo, lacinia dignissim lacus. Sed eget volutpat mi. In eu bibendum neque. Pellentesque - finibus velit a fermentum rhoncus. Maecenas leo purus, eleifend eu lacus a, condimentum sagittis - justo. -

"#; - let string_short = "Lorem ipsum dolor sit amet,bar&foo\"bar\\foo/bar"; - let empty = ""; - let no_escape = "Lorem ipsum dolor sit amet,"; - let no_escape_long = r#" -Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin scelerisque eu urna in aliquet. -Phasellus ac nulla a urna sagittis consequat id quis est. Nullam eu ex eget erat accumsan dictum -ac lobortis urna. Etiam fermentum ut quam at dignissim. Curabitur vestibulum luctus tellus, sit -amet lobortis augue tempor faucibus. Nullam sed felis eget odio elementum euismod in sit amet massa. -Vestibulum sagittis purus sit amet eros auctor, sit amet pharetra purus dapibus. Donec ornare metus -vel dictum porta. Etiam ut nisl nisi. Nullam rutrum porttitor mi. Donec aliquam ac ipsum eget -hendrerit. Cras faucibus, eros ut pharetra imperdiet, est tellus aliquet felis, eget convallis -lacus ipsum eget quam. Vivamus orci lorem, maximus ac mi eget, bibendum vulputate massa. In -vestibulum dui hendrerit, vestibulum lacus sit amet, posuere erat. Vivamus euismod massa diam, -vulputate euismod lectus vestibulum nec. Donec sit amet massa magna. Nunc ipsum nulla, euismod -quis lacus at, gravida maximus elit. Duis tristique, nisl nullam. - "#; +static A: &str = "a"; +static E: &str = "<"; + +fn escaping_short(b: &mut criterion::Bencher) { + b.iter(|| escape(E).to_string()); +} +fn no_escaping_short(b: &mut criterion::Bencher) { b.iter(|| { - format!("{}", MarkupDisplay::from(string_long)); - format!("{}", MarkupDisplay::from(string_short)); - format!("{}", MarkupDisplay::from(empty)); - format!("{}", MarkupDisplay::from(no_escape)); - format!("{}", MarkupDisplay::from(no_escape_long)); + escape(A).to_string(); }); } + +fn format_short(b: &mut criterion::Bencher) { + b.iter(|| A.to_string()); +} + +fn escaping(b: &mut criterion::Bencher) { + // 10 bytes at 10% escape + let string: &str = &[A, A, A, A, A, E, A, A, A, A, A].join(""); + + b.iter(|| escape(string).to_string()); +} + +fn no_escaping(b: &mut criterion::Bencher) { + let no_escape: &str = &A.repeat(10); + + b.iter(|| escape(no_escape).to_string()); +} + +fn format(b: &mut criterion::Bencher) { + let string: &str = &A.repeat(10); + + b.iter(|| string.to_string()); +} + +fn escaping_long(b: &mut criterion::Bencher) { + // 5 MB at 3.125% escape + let string: &str = &[&A.repeat(15), E, &A.repeat(16)] + .join("") + .repeat(160 * 1024); + + b.iter(|| escape(string).to_string()); +} + +fn no_escaping_long(b: &mut criterion::Bencher) { + let no_escape: &str = &A.repeat(5 * 1024 * 1024); + + b.iter(|| escape(no_escape).to_string()); +} + +fn format_long(b: &mut criterion::Bencher) { + let string: &str = &A.repeat(5 * 1024 * 1024); + + b.iter(|| string.to_string()); +} diff --git a/askama_escape/build.rs b/askama_escape/build.rs new file mode 100644 index 000000000..f1dae3a6d --- /dev/null +++ b/askama_escape/build.rs @@ -0,0 +1,29 @@ +extern crate version_check; + +use std::env; + +use version_check::is_min_version; + +fn main() { + enable_simd_optimizations(); +} + +fn enable_simd_optimizations() { + if is_env_set("CARGO_CFG_ASKAMA_DISABLE_AUTO_SIMD") { + return; + } + if !is_min_version("1.27.0") + .map(|(yes, _)| yes) + .unwrap_or(false) + { + return; + } + + println!("cargo:rustc-cfg=askama_runtime_simd"); + println!("cargo:rustc-cfg=askama_runtime_avx"); + println!("cargo:rustc-cfg=askama_runtime_sse"); +} + +fn is_env_set(name: &str) -> bool { + env::var(name).is_ok() +} diff --git a/askama_escape/src/lib.rs b/askama_escape/src/lib.rs index b967f1f02..075097beb 100644 --- a/askama_escape/src/lib.rs +++ b/askama_escape/src/lib.rs @@ -1,3 +1,6 @@ +#[macro_use] +extern crate cfg_if; + use std::fmt::{self, Display, Formatter}; use std::str; @@ -49,52 +52,486 @@ pub fn escape(s: &str) -> Escaped { } } -macro_rules! escaping_body { - ($start:ident, $i:ident, $fmt:ident, $_self:ident, $quote:expr) => {{ +pub struct Escaped<'a> { + bytes: &'a [u8], +} + +impl<'a> Display for Escaped<'a> { + fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { + _imp(self.bytes, fmt) + } +} + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", not(target_os = "windows"), askama_runtime_simd))] { + + use std::arch::x86_64::*; + use std::mem::{self, size_of}; + use std::sync::atomic::{AtomicUsize, Ordering}; + + #[inline(always)] + fn _imp(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result { + // https://github.com/BurntSushi/rust-memchr/blob/master/src/x86/mod.rs#L9-L29 + static mut FN: fn(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result = detect; + + fn detect(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result { + let fun = if cfg!(askama_runtime_avx) && is_x86_feature_detected!("avx2") { + _avx_escape as usize + } else if cfg!(askama_runtime_sse) && is_x86_feature_detected!("sse4.2") { + _sse_escape as usize + } else { + _escape as usize + }; + + let slot = unsafe { &*(&FN as *const _ as *const AtomicUsize) }; + slot.store(fun as usize, Ordering::Relaxed); + unsafe { + mem::transmute:: fmt::Result>(fun)(bytes, fmt) + } + } + + unsafe { + let slot = &*(&FN as *const _ as * const AtomicUsize); + let fun = slot.load(Ordering::Relaxed); + mem::transmute:: fmt::Result>(fun)(bytes, fmt) + } + } + + // Subtract `b` from `a` and return the difference. `a` should be greater than + // or equal to `b`. + #[inline(always)] + fn sub(a: *const u8, b: *const u8) -> usize { + debug_assert!(b <= a); + (a as usize) - (b as usize) + } + } else { + + #[inline(always)] + fn _imp(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result { + _escape(bytes, fmt) + } + } +} + +// Main instruction in escape +// Test 'start' iterator in current position, +// write slice since start to current position - 1, +// escape byte with quote and +// set 'start' iterator with next position +macro_rules! escape_body { + ($i:expr, $start:ident, $fmt:ident, $bytes:ident, $quote:expr) => {{ if $start < $i { - $fmt.write_str(unsafe { str::from_utf8_unchecked(&$_self.bytes[$start..$i]) })?; + #[allow(unused_unsafe)] + $fmt.write_str(unsafe { str::from_utf8_unchecked(&$bytes[$start..$i]) })?; } $fmt.write_str($quote)?; $start = $i + 1; }}; } -pub struct Escaped<'a> { - bytes: &'a [u8], +// Select between pairs bytes - quote to call the callback +macro_rules! bodies { + ($i:expr, $b:expr, $start:ident, $fmt:ident, $bytes:ident, $callback:ident) => { + match $b { + b'<' => $callback!($i, $start, $fmt, $bytes, "<"), + b'>' => $callback!($i, $start, $fmt, $bytes, ">"), + b'&' => $callback!($i, $start, $fmt, $bytes, "&"), + b'"' => $callback!($i, $start, $fmt, $bytes, """), + b'\'' => $callback!($i, $start, $fmt, $bytes, "'"), + b'/' => $callback!($i, $start, $fmt, $bytes, "/"), + _ => (), + } + }; +} + +// Wrap the body of the escape over the body of the mask +// Resolve expression and do the escape body +#[allow(unused_macros)] +macro_rules! mask_body { + ($i:expr, $start:ident, $fmt:ident, $bytes:ident, $quote:expr) => {{ + let i = $i; + escape_body!(i, $start, $fmt, $bytes, $quote); + }}; +} + +// Format bytes in the mask that starts in the current pointer +#[allow(unused_macros)] +macro_rules! mask_bodies { + ($mask:ident, $at:ident, $cur:ident, $ptr:ident, $start:ident, $fmt:ident, $bytes:ident) => { + bodies!($at + $cur, *$ptr.add($cur), $start, $fmt, $bytes, mask_body); + + $mask ^= 1 << $cur; + if $mask == 0 { + break; + } + + $cur = $mask.trailing_zeros() as usize; + }; +} + +// Write a mask +// Select starts mask byte pointer and current position to deep find. +// The main loop break when mask == 0 +#[allow(unused_macros)] +macro_rules! write_mask { + ($mask:ident, $ptr:ident, $start_ptr:ident, $start:ident, $fmt:ident, $bytes:ident) => {{ + let at = sub($ptr, $start_ptr); + let mut cur = $mask.trailing_zeros() as usize; + + loop { + mask_bodies!($mask, at, cur, $ptr, $start, $fmt, $bytes); + } + + debug_assert_eq!(at, sub($ptr, $start_ptr)) + }}; +} + +/// Scalar html escape +fn _escape(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result { + let mut start = 0; + + for (i, b) in bytes.iter().enumerate() { + if b.wrapping_sub(FLAG_BELOW) <= LEN { + bodies!(i, *b, start, fmt, bytes, escape_body); + } + } + + fmt.write_str(unsafe { str::from_utf8_unchecked(&bytes[start..]) })?; + + Ok(()) +} + +#[cfg(all( + target_arch = "x86_64", + not(target_os = "windows"), + askama_runtime_simd, + askama_runtime_avx +))] +#[target_feature(enable = "avx2")] +unsafe fn _avx_escape(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result { + const VECTOR_SIZE: usize = size_of::<__m256i>(); + const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; + const LOOP_SIZE: usize = 4 * VECTOR_SIZE; + + let v_flag = _mm256_set1_epi8((LEN + 1) as i8); + let v_flag_below = _mm256_set1_epi8(FLAG_BELOW as i8); + + let len = bytes.len(); + let start_ptr = bytes.as_ptr(); + let mut ptr = start_ptr; + let mut start = 0; + + // Write a sliced mask + macro_rules! write_forward { + ($mask: ident, $align:ident) => {{ + if $mask != 0 { + let at = sub(ptr, start_ptr); + let mut cur = $mask.trailing_zeros() as usize; + + while cur < $align { + mask_bodies!($mask, at, cur, ptr, start, fmt, bytes); + } + + debug_assert_eq!(at, sub(ptr, start_ptr)) + } + }}; + } + + if len < VECTOR_SIZE { + let a = _mm256_loadu_si256(ptr as *const __m256i); + let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below)); + let mut mask = _mm256_movemask_epi8(cmp); + + write_forward!(mask, len); + } else { + let end_ptr = bytes[len..].as_ptr(); + + { + let align = (VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)) & VECTOR_ALIGN; + if 0 < align { + let a = _mm256_loadu_si256(ptr as *const __m256i); + let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below)); + let mut mask = _mm256_movemask_epi8(cmp); + + write_forward!(mask, align); + ptr = ptr.add(align); + + debug_assert!(start <= sub(ptr, start_ptr)); + } + } + + debug_assert!(start_ptr <= ptr && start_ptr <= end_ptr.sub(VECTOR_SIZE)); + + if LOOP_SIZE <= len { + // Main loop 128 bytes, need aligned ptr at VECTOR_SIZE + while ptr <= end_ptr.sub(LOOP_SIZE) { + // Need aligned + debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); + + let a = _mm256_load_si256(ptr as *const __m256i); + let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); + let c = _mm256_load_si256(ptr.add(VECTOR_SIZE * 2) as *const __m256i); + let d = _mm256_load_si256(ptr.add(VECTOR_SIZE * 3) as *const __m256i); + let cmp_a = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below)); + let cmp_b = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(b, v_flag_below)); + let cmp_c = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(c, v_flag_below)); + let cmp_d = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(d, v_flag_below)); + let or1 = _mm256_or_si256(cmp_a, cmp_b); + let or2 = _mm256_or_si256(cmp_c, cmp_d); + + // Adjust the four masks in two from right to left. + if _mm256_movemask_epi8(_mm256_or_si256(or1, or2)) != 0 { + let mut mask = _mm256_movemask_epi8(cmp_a) as i64 + | (_mm256_movemask_epi8(cmp_b) as i64) << VECTOR_SIZE; + + if mask != 0 { + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); + } + let ptr = ptr.add(VECTOR_SIZE + VECTOR_SIZE); + + mask = _mm256_movemask_epi8(cmp_b) as i64 + | (_mm256_movemask_epi8(cmp_c) as i64) << VECTOR_SIZE; + + if mask != 0 { + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); + } + } + + ptr = ptr.add(LOOP_SIZE); + + debug_assert!(start <= sub(ptr, start_ptr)); + } + } + + while ptr <= end_ptr.sub(VECTOR_SIZE) { + // Need aligned + debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); + + let a = _mm256_load_si256(ptr as *const __m256i); + let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below)); + let mut mask = _mm256_movemask_epi8(cmp); + + if mask != 0 { + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); + } + ptr = ptr.add(VECTOR_SIZE); + + debug_assert!(start <= sub(ptr, start_ptr)); + } + + debug_assert!(end_ptr.sub(VECTOR_SIZE) < ptr); + + if ptr < end_ptr { + // Need aligned + debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); + + let a = _mm256_load_si256(ptr as *const __m256i); + let cmp = _mm256_cmpgt_epi8(v_flag, _mm256_sub_epi8(a, v_flag_below)); + let mut mask = _mm256_movemask_epi8(cmp); + let end = sub(end_ptr, ptr); + + write_forward!(mask, end); + } + } + + // Write since start to the end of the slice + debug_assert!(start <= len); + if start < len { + fmt.write_str(str::from_utf8_unchecked(&bytes[start..len]))?; + } + + Ok(()) } -impl<'a> ::std::fmt::Display for Escaped<'a> { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - let mut start = 0; - for (i, b) in self.bytes.iter().enumerate() { - if b.wrapping_sub(b'"') <= FLAG { - match *b { - b'<' => escaping_body!(start, i, fmt, self, "<"), - b'>' => escaping_body!(start, i, fmt, self, ">"), - b'&' => escaping_body!(start, i, fmt, self, "&"), - b'"' => escaping_body!(start, i, fmt, self, """), - b'\'' => escaping_body!(start, i, fmt, self, "'"), - b'/' => escaping_body!(start, i, fmt, self, "/"), - _ => (), +#[cfg(all( + target_arch = "x86_64", + not(target_os = "windows"), + askama_runtime_simd, + askama_runtime_sse +))] +#[target_feature(enable = "sse4.2")] +unsafe fn _sse_escape(bytes: &[u8], fmt: &mut Formatter) -> fmt::Result { + const VECTOR_SIZE: usize = size_of::<__m128i>(); + const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; + const LOOP_SIZE: usize = 4 * VECTOR_SIZE; + const NEEDLE_LEN: i32 = 6; + + let needle = _mm_setr_epi8( + b'<' as i8, b'>' as i8, b'&' as i8, b'"' as i8, + b'\'' as i8, b'/' as i8, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + + let len = bytes.len(); + let start_ptr = bytes.as_ptr(); + let mut ptr = start_ptr; + let mut start = 0; + + if len < VECTOR_SIZE { + let a = _mm_loadu_si128(ptr as *const __m128i); + let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, len as i32, 0); + let mut mask = _mm_extract_epi16(cmp, 0) as i16; + + // No need write forward because I specified string size in + // compare instruction + if mask != 0 { + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); + } + } else { + let end_ptr = bytes[len..].as_ptr(); + + { + let align = (VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)) & VECTOR_ALIGN; + if 0 < align { + let a = _mm_loadu_si128(ptr as *const __m128i); + let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, align as i32, 0); + let mut mask = _mm_extract_epi16(cmp, 0) as i16; + + if mask != 0 { + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); + } + ptr = ptr.add(align); + + debug_assert!(start <= sub(ptr, start_ptr)); + } + } + + if LOOP_SIZE <= len { + // Main loop 64 bytes, need aligned ptr at VECTOR_SIZE + while ptr <= end_ptr.sub(LOOP_SIZE) { + // Need aligned + debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); + + let a = _mm_load_si128(ptr as *const __m128i); + let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); + let c = _mm_load_si128(ptr.add(VECTOR_SIZE * 2) as *const __m128i); + let d = _mm_load_si128(ptr.add(VECTOR_SIZE * 3) as *const __m128i); + let cmp_a = _mm_cmpestrm(needle, NEEDLE_LEN, a, VECTOR_SIZE as i32, 0); + let cmp_b = _mm_cmpestrm(needle, NEEDLE_LEN, b, VECTOR_SIZE as i32, 0); + let cmp_c = _mm_cmpestrm(needle, NEEDLE_LEN, c, VECTOR_SIZE as i32, 0); + let cmp_d = _mm_cmpestrm(needle, NEEDLE_LEN, d, VECTOR_SIZE as i32, 0); + let or1 = _mm_or_si128(cmp_a, cmp_b); + let or2 = _mm_or_si128(cmp_c, cmp_d); + + // Adjust the four masks in one from right to left. + if _mm_extract_epi16(_mm_or_si128(or1, or2), 0) != 0 { + let mut mask = _mm_extract_epi16(cmp_a, 0) as i64 + | (_mm_extract_epi16(cmp_b, 0) as i64) << VECTOR_SIZE + | (_mm_extract_epi16(cmp_c, 0) as i64) << VECTOR_SIZE * 2 + | (_mm_extract_epi16(cmp_d, 0) as i64) << VECTOR_SIZE * 3; + + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); } + + ptr = ptr.add(LOOP_SIZE); + + debug_assert!(start <= sub(ptr, start_ptr)); } } - fmt.write_str(unsafe { str::from_utf8_unchecked(&self.bytes[start..]) })?; - Ok(()) + + while ptr <= end_ptr.sub(VECTOR_SIZE) { + // Need aligned + debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); + + let a = _mm_load_si128(ptr as *const __m128i); + let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, VECTOR_SIZE as i32, 0); + let mut mask = _mm_extract_epi16(cmp, 0) as i16; + + if mask != 0 { + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); + } + ptr = ptr.add(VECTOR_SIZE); + + debug_assert!(start <= sub(ptr, start_ptr)); + } + + debug_assert!(end_ptr.sub(VECTOR_SIZE) < ptr); + + if ptr < end_ptr { + // Need aligned + debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); + + let end = sub(end_ptr, ptr); + let a = _mm_load_si128(ptr as *const __m128i); + let cmp = _mm_cmpestrm(needle, NEEDLE_LEN, a, end as i32, 0); + let mut mask = _mm_extract_epi16(cmp, 0) as i16; + + // No need write forward because I specified string size in + // compare instruction + if mask != 0 { + write_mask!(mask, ptr, start_ptr, start, fmt, bytes); + } + } + } + + // Write since start to the end of the slice + debug_assert!(start <= len); + if start < len { + fmt.write_str(str::from_utf8_unchecked(&bytes[start..len]))?; } + + Ok(()) } -const FLAG: u8 = b'>' - b'"'; +// Defining character interval from ASCII table to create bit masks from slice to be escaped +const LEN: u8 = b'>' - b'"'; +const FLAG_BELOW: u8 = b'"'; #[cfg(test)] mod tests { use super::*; + #[test] fn test_escape() { + let escapes = "<>&\"'/"; + let escaped = "<>&"'/"; + let string_long: &str = &"foobar".repeat(1024); + assert_eq!(escape("").to_string(), ""); assert_eq!(escape("<&>").to_string(), "<&>"); - assert_eq!(escape("bla&").to_string(), "bla&"); + assert_eq!(escape("bar&").to_string(), "bar&"); assert_eq!(escape(" is \"unsafe\" & should be 'escaped'").to_string(), + "// my <html> is "unsafe" & \ + should be 'escaped'" + ); + assert_eq!(escape(&"<".repeat(16)).to_string(), "<".repeat(16)); + assert_eq!(escape(&"<".repeat(32)).to_string(), "<".repeat(32)); + assert_eq!(escape(&"<".repeat(64)).to_string(), "<".repeat(64)); + assert_eq!(escape(&"<".repeat(128)).to_string(), "<".repeat(128)); + assert_eq!(escape(&"<".repeat(1024)).to_string(), "<".repeat(1024)); + assert_eq!(escape(&"<".repeat(129)).to_string(), "<".repeat(129)); + assert_eq!( + escape(&"<".repeat(128 * 2 - 1)).to_string(), + "<".repeat(128 * 2 - 1) + ); + assert_eq!( + escape(&"<".repeat(128 * 8 - 1)).to_string(), + "<".repeat(128 * 8 - 1) + ); + assert_eq!(escape(string_long).to_string(), string_long); + assert_eq!( + escape(&[string_long, "<"].join("")).to_string(), + [string_long, "<"].join("") + ); + assert_eq!( + escape(&["<", string_long].join("")).to_string(), + ["<", string_long].join("") + ); + assert_eq!( + escape(&escapes.repeat(1024)).to_string(), + escaped.repeat(1024) + ); + assert_eq!( + escape(&[string_long, "<", string_long].join("")).to_string(), + [string_long, "<", string_long].join("") + ); + assert_eq!( + escape(&[string_long, "<", string_long, escapes, string_long,].join("")).to_string(), + [string_long, "<", string_long, escaped, string_long,].join("") + ); } }