diff --git a/src/lib.rs b/src/lib.rs index 3232852..846ecee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -953,20 +953,18 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { #[allow(missing_docs)] // WARNING: Exported for internal benchmarks, not fit for public consumption pub fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { + let start = bytes.pos(); + simd::match_uri_vectored(bytes); // URI must have at least one char - let uri_len = simd::match_uri_vectored(bytes.as_ref()); - if uri_len == 0 { + if bytes.pos() == start { return Err(Error::Token); } - // SAFETY: these bytes have just been matched here above. - unsafe { bytes.advance(uri_len) }; - let uri_slice = bytes.slice(); - let space_delim = next!(bytes); - if space_delim == b' ' { - // SAFETY: all bytes within `uri_slice` must have been `is_token` and therefore also utf-8. - let uri = unsafe { str::from_utf8_unchecked(uri_slice) }; - Ok(Status::Complete(uri)) + if next!(bytes) == b' ' { + return Ok(Status::Complete( + // SAFETY: all bytes up till `i` must have been `is_token` and therefore also utf-8. + unsafe { str::from_utf8_unchecked(bytes.slice_skip(1)) }, + )); } else { Err(Error::Token) } @@ -1181,15 +1179,15 @@ fn parse_headers_iter_uninit<'a>( #[allow(clippy::never_loop)] // parse header name until colon let header_name: &str = 'name: loop { - let len = simd::match_header_name_vectored(bytes.as_ref()); - // SAFETY: these bytes have just been matched here above. - unsafe { bytes.advance(len) }; - let bslice = bytes.slice(); + simd::match_header_name_vectored(bytes); + let mut b = next!(bytes); + + // SAFETY: previously bumped by 1 with next! -> always safe. + let bslice = unsafe { bytes.slice_skip(1) }; // SAFETY: previous call to match_header_name_vectored ensured all bytes are valid // header name chars, and as such also valid utf-8. let name = unsafe { str::from_utf8_unchecked(bslice) }; - let mut b = next!(bytes); if b == b':' { break 'name name; } @@ -1215,7 +1213,6 @@ fn parse_headers_iter_uninit<'a>( // eat white space between colon and value 'whitespace_after_colon: loop { b = next!(bytes); - if b == b' ' || b == b'\t' { bytes.slice(); continue 'whitespace_after_colon; @@ -1242,9 +1239,7 @@ fn parse_headers_iter_uninit<'a>( 'value_lines: loop { // parse value till EOL - let len = simd::match_header_value_vectored(bytes.as_ref()); - // SAFETY: these bytes have just been matched here above. - unsafe { bytes.advance(len) }; + simd::match_header_value_vectored(bytes); let b = next!(bytes); //found_ctl diff --git a/src/simd/avx2.rs b/src/simd/avx2.rs index 3f8a88c..6a7edc1 100644 --- a/src/simd/avx2.rs +++ b/src/simd/avx2.rs @@ -1,21 +1,18 @@ +use crate::iter::Bytes; + #[inline] #[target_feature(enable = "avx2", enable = "sse4.2")] -pub(crate) unsafe fn match_uri_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= 32 { - let advance = match_url_char_32_avx(remaining); - len = len.saturating_add(advance); - remaining = &bytes[len..]; +pub unsafe fn match_uri_vectored(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 32 { + let advance = match_url_char_32_avx(bytes.as_ref()); + bytes.advance(advance); if advance != 32 { - return len; + return; } } // do both, since avx2 only works when bytes.len() >= 32 - let advance = super::sse42::match_uri_vectored(remaining); - len = len.saturating_add(advance); - len + super::sse42::match_uri_vectored(bytes) } #[inline(always)] @@ -60,22 +57,17 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize { } #[target_feature(enable = "avx2", enable = "sse4.2")] -pub(crate) unsafe fn match_header_value_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= 32 { - let advance = match_header_value_char_32_avx(remaining); - len = len.saturating_add(advance); - remaining = &bytes[len..]; +pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 32 { + let advance = match_header_value_char_32_avx(bytes.as_ref()); + bytes.advance(advance); if advance != 32 { - return len; + return; } } // do both, since avx2 only works when bytes.len() >= 32 - let advance = super::sse42::match_header_value_vectored(remaining); - len = len.saturating_add(advance); - len + super::sse42::match_header_value_vectored(bytes) } #[inline(always)] @@ -146,7 +138,7 @@ fn avx2_code_matches_header_value_chars_table() { } #[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool { +unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { let slice = [ b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', @@ -157,9 +149,11 @@ unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool b'_', b'_', byte, b'_', b'_', b'_', b'_', b'_', ]; + let mut bytes = Bytes::new(&slice); + + f(&mut bytes); - let pos = f(&slice); - match pos { + match bytes.pos() { 32 => true, 26 => false, _ => unreachable!(), diff --git a/src/simd/mod.rs b/src/simd/mod.rs index a2da053..63464b4 100644 --- a/src/simd/mod.rs +++ b/src/simd/mod.rs @@ -11,7 +11,7 @@ mod swar; ) ), )))] -pub(crate) use self::swar::*; +pub use self::swar::*; #[cfg(all( httparse_simd, @@ -59,7 +59,7 @@ mod runtime; target_arch = "x86_64", ), ))] -pub(crate) use self::runtime::*; +pub use self::runtime::*; #[cfg(all( httparse_simd, @@ -72,18 +72,18 @@ pub(crate) use self::runtime::*; ))] mod sse42_compile_time { #[inline(always)] - pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize { - super::swar::match_header_name_vectored(b) + pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) { + super::swar::match_header_name_vectored(b); } #[inline(always)] - pub(crate) fn match_uri_vectored(b: &[u8]) -> usize { + pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::sse42::match_uri_vectored(b) } } - + #[inline(always)] - pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize { + pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::sse42::match_header_value_vectored(b) } } @@ -98,7 +98,7 @@ mod sse42_compile_time { target_arch = "x86_64", ), ))] -pub(crate) use self::sse42_compile_time::*; +pub use self::sse42_compile_time::*; #[cfg(all( httparse_simd, @@ -110,18 +110,18 @@ pub(crate) use self::sse42_compile_time::*; ))] mod avx2_compile_time { #[inline(always)] - pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize { - super::swar::match_header_name_vectored(b) + pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) { + super::swar::match_header_name_vectored(b); } #[inline(always)] - pub(crate) fn match_uri_vectored(b: &[u8]) -> usize { + pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::avx2::match_uri_vectored(b) } } - + #[inline(always)] - pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize { + pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::avx2::match_header_value_vectored(b) } } @@ -135,7 +135,7 @@ mod avx2_compile_time { target_arch = "x86_64", ), ))] -pub(crate) use self::avx2_compile_time::*; +pub use self::avx2_compile_time::*; #[cfg(all( httparse_simd, @@ -149,4 +149,4 @@ mod neon; target_arch = "aarch64", httparse_simd_neon_intrinsics, ))] -pub(crate) use self::neon::*; +pub use self::neon::*; diff --git a/src/simd/neon.rs b/src/simd/neon.rs index 1e85589..c6b86a8 100644 --- a/src/simd/neon.rs +++ b/src/simd/neon.rs @@ -1,60 +1,52 @@ +use crate::iter::Bytes; use core::arch::aarch64::*; #[inline] -pub(crate) fn match_header_name_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining. - let advance = unsafe { match_header_name_char_16_neon(remaining) }; - len = len.saturating_add(advance); - remaining = &bytes[len..]; - - if advance != 16 { - return len; +pub fn match_header_name_vectored(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining + unsafe { + let advance = match_header_name_char_16_neon(bytes.as_ref().as_ptr()); + bytes.advance(advance); + + if advance != 16 { + return; + } } } - let advance = super::swar::match_header_name_vectored(remaining); - len = len.saturating_add(advance); - len + super::swar::match_header_name_vectored(bytes); } #[inline] -pub(crate) fn match_header_value_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining. - let advance = unsafe { match_header_value_char_16_neon(remaining) }; - len = len.saturating_add(advance); - remaining = &bytes[len..]; - - if advance != 16 { - return len; +pub fn match_header_value_vectored(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining + unsafe { + let advance = match_header_value_char_16_neon(bytes.as_ref().as_ptr()); + bytes.advance(advance); + + if advance != 16 { + return; + } } } - let advance = super::swar::match_header_value_vectored(remaining); - len = len.saturating_add(advance); - len + super::swar::match_header_value_vectored(bytes); } #[inline] -pub(crate) fn match_uri_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining. - let advance = unsafe { match_url_char_16_neon(remaining) }; - len = len.saturating_add(advance); - remaining = &bytes[len..]; - - if advance != 16 { - return len; +pub fn match_uri_vectored(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining + unsafe { + let advance = match_url_char_16_neon(bytes.as_ref().as_ptr()); + bytes.advance(advance); + + if advance != 16 { + return; + } } } - let advance = super::swar::match_uri_vectored(remaining); - len = len.saturating_add(advance); - len + super::swar::match_uri_vectored(bytes); } const fn bit_set(x: u8) -> bool { @@ -89,7 +81,7 @@ const BITMAPS: ([u8; 16], [u8; 16]) = build_bitmap(); // NOTE: adapted from 256-bit version, with upper 128-bit ops commented out #[inline] -unsafe fn match_header_name_char_16_neon(bytes: &[u8]) -> usize { +unsafe fn match_header_name_char_16_neon(ptr: *const u8) -> usize { let bitmaps = BITMAPS; // NOTE: ideally compile-time constants let (bitmap_0_7, _bitmap_8_15) = bitmaps; @@ -102,7 +94,7 @@ unsafe fn match_header_name_char_16_neon(bytes: &[u8]) -> usize { let bitmask_lookup = vld1q_u8(BITMASK_LOOKUP_DATA.as_ptr()); // Load 16 input bytes. - let input = vld1q_u8(bytes.as_ptr()); + let input = vld1q_u8(ptr); // Extract indices for row_0_7. let indices_0_7 = vandq_u8(input, vdupq_n_u8(0x8F)); // 0b1000_1111; @@ -130,8 +122,8 @@ unsafe fn match_header_name_char_16_neon(bytes: &[u8]) -> usize { } #[inline] -unsafe fn match_url_char_16_neon(bytes: &[u8]) -> usize { - let input = vld1q_u8(bytes.as_ptr()); +unsafe fn match_url_char_16_neon(ptr: *const u8) -> usize { + let input = vld1q_u8(ptr); // Check that b'!' <= input <= b'~' let result = vandq_u8( @@ -149,8 +141,8 @@ unsafe fn match_url_char_16_neon(bytes: &[u8]) -> usize { } #[inline] -unsafe fn match_header_value_char_16_neon(bytes: &[u8]) -> usize { - let input = vld1q_u8(bytes.as_ptr()); +unsafe fn match_header_value_char_16_neon(ptr: *const u8) -> usize { + let input = vld1q_u8(ptr); // Check that b' ' <= and b != 127 or b == 9 let result = vcleq_u8(vdupq_n_u8(b' '), input); @@ -203,56 +195,67 @@ unsafe fn offsetnz(x: uint8x16_t) -> u32 { #[test] fn neon_code_matches_uri_chars_table() { - assert!(byte_is_allowed(b'_', match_uri_vectored)); - - for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_uri_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); + #[allow(clippy::undocumented_unsafe_blocks)] + unsafe { + assert!(byte_is_allowed(b'_', match_uri_vectored)); + + for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() { + assert_eq!( + byte_is_allowed(b as u8, match_uri_vectored), + allowed, + "byte_is_allowed({:?}) should be {:?}", + b, + allowed, + ); + } } } #[test] fn neon_code_matches_header_value_chars_table() { - assert!(byte_is_allowed(b'_', match_header_value_vectored)); - - for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_value_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); + #[allow(clippy::undocumented_unsafe_blocks)] + unsafe { + assert!(byte_is_allowed(b'_', match_header_value_vectored)); + + for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() { + assert_eq!( + byte_is_allowed(b as u8, match_header_value_vectored), + allowed, + "byte_is_allowed({:?}) should be {:?}", + b, + allowed, + ); + } } } #[test] fn neon_code_matches_header_name_chars_table() { - assert!(byte_is_allowed(b'_', match_header_name_vectored)); - - for (b, allowed) in crate::HEADER_NAME_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_name_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); + #[allow(clippy::undocumented_unsafe_blocks)] + unsafe { + assert!(byte_is_allowed(b'_', match_header_name_vectored)); + + for (b, allowed) in crate::HEADER_NAME_MAP.iter().cloned().enumerate() { + assert_eq!( + byte_is_allowed(b as u8, match_header_name_vectored), + allowed, + "byte_is_allowed({:?}) should be {:?}", + b, + allowed, + ); + } } } #[cfg(test)] -fn byte_is_allowed(byte: u8, f: fn(bytes: &[u8]) -> usize) -> bool { +unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { let mut slice = [b'_'; 16]; slice[10] = byte; + let mut bytes = Bytes::new(&slice); + + f(&mut bytes); - let pos = f(&slice); - match pos { + match bytes.pos() { 16 => true, 10 => false, x => panic!("unexpected pos: {}", x), diff --git a/src/simd/runtime.rs b/src/simd/runtime.rs index c51e317..c523a92 100644 --- a/src/simd/runtime.rs +++ b/src/simd/runtime.rs @@ -1,5 +1,5 @@ use std::sync::atomic::{AtomicU8, Ordering}; - +use crate::iter::Bytes; use super::avx2; use super::sse42; @@ -30,11 +30,11 @@ fn get_runtime_feature() -> u8 { feature } -pub(crate) fn match_header_name_vectored(bytes: &[u8]) -> usize { - super::swar::match_header_name_vectored(bytes) +pub fn match_header_name_vectored(bytes: &mut Bytes) { + super::swar::match_header_name_vectored(bytes); } -pub(crate) fn match_uri_vectored(bytes: &[u8]) -> usize { +pub fn match_uri_vectored(bytes: &mut Bytes) { // SAFETY: calls are guarded by a feature check unsafe { match get_runtime_feature() { @@ -45,7 +45,7 @@ pub(crate) fn match_uri_vectored(bytes: &[u8]) -> usize { } } -pub(crate) fn match_header_value_vectored(bytes: &[u8]) -> usize { +pub fn match_header_value_vectored(bytes: &mut Bytes) { // SAFETY: calls are guarded by a feature check unsafe { match get_runtime_feature() { diff --git a/src/simd/sse42.rs b/src/simd/sse42.rs index d835f19..d6fbf02 100644 --- a/src/simd/sse42.rs +++ b/src/simd/sse42.rs @@ -1,20 +1,16 @@ +use crate::iter::Bytes; + #[target_feature(enable = "sse4.2")] -pub(crate) unsafe fn match_uri_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining. - let advance = match_url_char_16_sse(remaining); - len = len.saturating_add(advance); - remaining = &bytes[len..]; +pub unsafe fn match_uri_vectored(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 16 { + let advance = match_url_char_16_sse(bytes.as_ref()); + bytes.advance(advance); if advance != 16 { - return len; + return; } } - let advance = super::swar::match_uri_vectored(remaining); - len = len.saturating_add(advance); - len + super::swar::match_uri_vectored(bytes); } #[inline(always)] @@ -66,22 +62,16 @@ unsafe fn match_url_char_16_sse(buf: &[u8]) -> usize { } #[target_feature(enable = "sse4.2")] -pub(crate) unsafe fn match_header_value_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining. - let advance = match_header_value_char_16_sse(remaining); - len = len.saturating_add(advance); - remaining = &bytes[len..]; - - if advance != 16 { - return len; - } +pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) { + while bytes.as_ref().len() >= 16 { + let advance = match_header_value_char_16_sse(bytes.as_ref()); + bytes.advance(advance); + + if advance != 16 { + return; + } } - let advance = super::swar::match_header_value_vectored(remaining); - len = len.saturating_add(advance); - len + super::swar::match_header_value_vectored(bytes); } #[inline(always)] @@ -153,16 +143,18 @@ fn sse_code_matches_header_value_chars_table() { #[allow(clippy::missing_safety_doc)] #[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool { - let slice = &[ +unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { + let slice = [ b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', byte, b'_', b'_', b'_', b'_', b'_', ]; + let mut bytes = Bytes::new(&slice); + + f(&mut bytes); - let pos = f(slice); - match pos { + match bytes.pos() { 16 => true, 10 => false, _ => unreachable!(), diff --git a/src/simd/swar.rs b/src/simd/swar.rs index 8cef3ce..857fc58 100644 --- a/src/simd/swar.rs +++ b/src/simd/swar.rs @@ -1,73 +1,83 @@ /// SWAR: SIMD Within A Register /// SIMD validator backend that validates register-sized chunks of data at a time. -use crate::{is_header_name_token, is_header_value_token, is_uri_token}; -use core::convert::TryInto; +use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes}; // Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8 const BLOCK_SIZE: usize = core::mem::size_of::(); type ByteBlock = [u8; BLOCK_SIZE]; #[inline] -pub(crate) fn match_uri_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= BLOCK_SIZE { - let block = &remaining[..BLOCK_SIZE]; - let advance = match_uri_char_8_swar(block.try_into().unwrap()); - len = len.saturating_add(advance); - remaining = &bytes[len..]; - if advance != BLOCK_SIZE { - // NOTE: must continue to tail-matching logic below, due to known - // false-negatives that need to be individually checked. - break; +pub fn match_uri_vectored(bytes: &mut Bytes) { + loop { + if let Some(bytes8) = bytes.peek_n::(BLOCK_SIZE) { + let n = match_uri_char_8_swar(bytes8); + // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes + // in `bytes`, so calling `advance(n)` is safe. + unsafe { + bytes.advance(n); + } + if n == BLOCK_SIZE { + continue; + } } + if let Some(b) = bytes.peek() { + if is_uri_token(b) { + // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte + // in bytes, so calling advance is safe. + unsafe { + bytes.advance(1); + } + continue; + } + } + break; } - let tail_len = match_tail(is_uri_token, remaining); - len = len.saturating_add(tail_len); - len } #[inline] -pub(crate) fn match_header_value_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= BLOCK_SIZE { - let block = &remaining[..BLOCK_SIZE]; - let advance = match_header_value_char_8_swar(block.try_into().unwrap()); - len = len.saturating_add(advance); - remaining = &bytes[len..]; - if advance != BLOCK_SIZE { - // NOTE: must continue to tail-matching logic below, due to known - // false-negatives that need to be individually checked. - break; +pub fn match_header_value_vectored(bytes: &mut Bytes) { + loop { + if let Some(bytes8) = bytes.peek_n::(BLOCK_SIZE) { + let n = match_header_value_char_8_swar(bytes8); + // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes + // in `bytes`, so calling `advance(n)` is safe. + unsafe { + bytes.advance(n); + } + if n == BLOCK_SIZE { + continue; + } + } + if let Some(b) = bytes.peek() { + if is_header_value_token(b) { + // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte + // in bytes, so calling advance is safe. + unsafe { + bytes.advance(1); + } + continue; + } } + break; } - let tail_len = match_tail(is_header_value_token, remaining); - len = len.saturating_add(tail_len); - len } #[inline] -pub(crate) fn match_header_name_vectored(bytes: &[u8]) -> usize { - let mut len = 0usize; - let mut remaining = bytes; - while remaining.len() >= BLOCK_SIZE { - let block = &remaining[..BLOCK_SIZE]; - let advance = block.iter().position(|b| !is_header_name_token(*b)); - match advance { - None => { - len = len.saturating_add(BLOCK_SIZE); - remaining = &bytes[len..]; - } - Some(v) => { - len = len.saturating_add(v); - return len; - } - }; +pub fn match_header_name_vectored(bytes: &mut Bytes) { + while let Some(block) = bytes.peek_n::(BLOCK_SIZE) { + let n = match_block(is_header_name_token, block); + // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes + // in `bytes`, so calling `advance(n)` is safe. + unsafe { + bytes.advance(n); + } + if n != BLOCK_SIZE { + return; + } } - let tail_len = match_tail(is_header_name_token, remaining); - len = len.saturating_add(tail_len); - len + // SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the + // end, but no further. + unsafe { bytes.advance(match_tail(is_header_name_token, bytes.as_ref())) }; } // Matches "tail", i.e: when we have bool, bytes: &[u8]) -> usize { bytes.len() } +// Naive fallback block matcher +#[inline(always)] +fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize { + for (i, &b) in block.iter().enumerate() { + if !f(b) { + return i; + } + } + BLOCK_SIZE +} + // A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44) // creates a u64 whose bytes are each equal to b const fn uniform_block(b: u8) -> usize {