From e053fd6a2268a41b1a23df1878ce886519704b5c Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Tue, 22 Oct 2019 12:03:40 -0700 Subject: [PATCH] Enable creating from [u8] --- Cargo.toml | 4 ++ benches/construct.rs | 151 +++++++++++++++++++++++++++++++++++++++++++ benches/tinystr.rs | 48 -------------- src/helpers.rs | 6 +- src/tinystr16.rs | 55 ++++++++++------ src/tinystr4.rs | 37 ++++++++--- src/tinystr8.rs | 55 ++++++++++------ tests/main.rs | 39 +++++++++++ 8 files changed, 296 insertions(+), 99 deletions(-) create mode 100644 benches/construct.rs diff --git a/Cargo.toml b/Cargo.toml index a52a40f..c878543 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,10 @@ categories = ["data-structures"] [dev-dependencies] criterion = "0.3" +[[bench]] +name = "construct" +harness = false + [[bench]] name = "tinystr" harness = false diff --git a/benches/construct.rs b/benches/construct.rs new file mode 100644 index 0000000..854a1b0 --- /dev/null +++ b/benches/construct.rs @@ -0,0 +1,151 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Bencher; +use criterion::Criterion; +use criterion::Fun; + +use tinystr::{TinyStr16, TinyStr4, TinyStr8}; + +static STRINGS_4: &[&str] = &[ + "US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK", +]; + +static STRINGS_8: &[&str] = &[ + "Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419", + "und", "UK", +]; + +static STRINGS_16: &[&str] = &[ + "Latn", + "windows", + "AR", + "Hans", + "macos", + "AT", + "infiniband", + "FR", + "en", + "Cyrl", + "FromIntegral", + "NO", + "419", + "MacintoshOSX2019", + "UK", +]; + +macro_rules! bench_block { + ($c:expr, $name:expr, $action:ident) => { + let funcs = vec![ + Fun::new("String", $action!(String)), + Fun::new("TinyStr4", $action!(TinyStr4)), + Fun::new("TinyStr8", $action!(TinyStr8)), + Fun::new("TinyStr16", $action!(TinyStr16)), + ]; + + $c.bench_functions(&format!("{}/4", $name), funcs, STRINGS_4); + + let funcs = vec![ + Fun::new("String", $action!(String)), + Fun::new("TinyStr8", $action!(TinyStr8)), + Fun::new("TinyStr16", $action!(TinyStr16)), + ]; + + $c.bench_functions(&format!("{}/8", $name), funcs, STRINGS_8); + + let funcs = vec![ + Fun::new("String", $action!(String)), + Fun::new("TinyStr16", $action!(TinyStr16)), + ]; + + $c.bench_functions(&format!("{}/16", $name), funcs, STRINGS_16); + }; +} + +fn construct_from_str(c: &mut Criterion) { + macro_rules! cfs { + ($r:ty) => { + |b: &mut Bencher, strings: &&[&str]| { + b.iter(|| { + for s in *strings { + let _: $r = black_box(s.parse().unwrap()); + } + }) + } + }; + }; + + bench_block!(c, "construct_from_str", cfs); +} + +fn construct_from_bytes(c: &mut Criterion) { + macro_rules! cfu { + ($r:ty) => { + |b, inputs: &&[&str]| { + let raw: Vec<&[u8]> = inputs.iter().map(|s| s.as_bytes()).collect(); + b.iter(move || { + for u in &raw { + let _ = black_box(<$r>::from_bytes(*u).unwrap()); + } + }) + } + }; + }; + + let funcs = vec![ + Fun::new("TinyStr4", cfu!(TinyStr4)), + Fun::new("TinyStr8", cfu!(TinyStr8)), + Fun::new("TinyStr16", cfu!(TinyStr16)), + ]; + + c.bench_functions("construct_from_bytes/4", funcs, STRINGS_4); + + let funcs = vec![ + Fun::new("TinyStr8", cfu!(TinyStr8)), + Fun::new("TinyStr16", cfu!(TinyStr16)), + ]; + + c.bench_functions("construct_from_bytes/8", funcs, STRINGS_8); + + let funcs = vec![Fun::new("TinyStr16", cfu!(TinyStr16))]; + + c.bench_functions("construct_from_bytes/16", funcs, STRINGS_16); +} + +fn construct_unchecked(c: &mut Criterion) { + macro_rules! cu { + ($tty:ty, $rty:ty) => { + |b, inputs: &&[&str]| { + let raw: Vec<$rty> = inputs + .iter() + .map(|s| s.parse::<$tty>().unwrap().into()) + .collect(); + b.iter(move || { + for num in &raw { + let _ = unsafe { <$tty>::new_unchecked(black_box(*num)) }; + } + }) + } + }; + }; + + let funcs = vec![Fun::new("TinyStr4", cu!(TinyStr4, u32))]; + + c.bench_functions("construct_unchecked/4", funcs, STRINGS_4); + + let funcs = vec![Fun::new("TinyStr8", cu!(TinyStr8, u64))]; + + c.bench_functions("construct_unchecked/8", funcs, STRINGS_8); + + let funcs = vec![Fun::new("TinyStr16", cu!(TinyStr16, u128))]; + + c.bench_functions("construct_unchecked/16", funcs, STRINGS_16); +} + +criterion_group!( + benches, + construct_from_str, + construct_from_bytes, + construct_unchecked, +); +criterion_main!(benches); diff --git a/benches/tinystr.rs b/benches/tinystr.rs index 5690a37..83b26a3 100644 --- a/benches/tinystr.rs +++ b/benches/tinystr.rs @@ -62,52 +62,6 @@ macro_rules! bench_block { }; } -fn construct_from_str(c: &mut Criterion) { - macro_rules! cfs { - ($r:ty) => { - |b: &mut Bencher, strings: &&[&str]| { - b.iter(|| { - for s in *strings { - let _: $r = black_box(s.parse().unwrap()); - } - }) - } - }; - }; - - bench_block!(c, "construct_from_str", cfs); -} - -fn construct_unchecked(c: &mut Criterion) { - macro_rules! cu { - ($tty:ty, $rty:ty) => { - |b, inputs: &&[&str]| { - let raw: Vec<$rty> = inputs - .iter() - .map(|s| s.parse::<$tty>().unwrap().into()) - .collect(); - b.iter(move || { - for num in &raw { - let _ = unsafe { <$tty>::new_unchecked(black_box(*num)) }; - } - }) - } - }; - }; - - let funcs = vec![Fun::new("TinyStr4", cu!(TinyStr4, u32))]; - - c.bench_functions("construct_unchecked/4", funcs, STRINGS_4); - - let funcs = vec![Fun::new("TinyStr8", cu!(TinyStr8, u64))]; - - c.bench_functions("construct_unchecked/8", funcs, STRINGS_8); - - let funcs = vec![Fun::new("TinyStr16", cu!(TinyStr16, u128))]; - - c.bench_functions("construct_unchecked/16", funcs, STRINGS_16); -} - macro_rules! convert_to_ascii { ($ty:ty, $action:ident) => { |b: &mut Bencher, inputs: &&[&str]| { @@ -213,8 +167,6 @@ fn test_eq(c: &mut Criterion) { criterion_group!( benches, - construct_from_str, - construct_unchecked, convert_to_ascii_lowercase, convert_to_ascii_uppercase, convert_to_ascii_titlecase, diff --git a/src/helpers.rs b/src/helpers.rs index c9a9b8d..9b5be84 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -4,15 +4,15 @@ use std::ptr::copy_nonoverlapping; use super::Error; #[inline(always)] -pub(crate) unsafe fn make_4byte_str( - text: &str, +pub(crate) unsafe fn make_4byte_bytes( + bytes: &[u8], len: usize, mask: u32, ) -> Result { // Mask is always supplied as little-endian. let mask = u32::from_le(mask); let mut word: u32 = 0; - copy_nonoverlapping(text.as_ptr(), &mut word as *mut u32 as *mut u8, len); + copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u32 as *mut u8, len); if (word & mask) != 0 { return Err(Error::NonAscii); } diff --git a/src/tinystr16.rs b/src/tinystr16.rs index 821a7ab..c089516 100644 --- a/src/tinystr16.rs +++ b/src/tinystr16.rs @@ -25,6 +25,41 @@ use crate::Error; pub struct TinyStr16(NonZeroU128); impl TinyStr16 { + /// Creates a TinyStr16 from a byte slice. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr16; + /// + /// let s1 = TinyStr16::from_bytes("Testing".as_bytes()) + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1, "Testing"); + /// ``` + #[inline(always)] + pub fn from_bytes(bytes: &[u8]) -> Result { + let len = bytes.len(); + if len < 1 || len > 16 { + return Err(Error::InvalidSize); + } + unsafe { + let mut word: u128 = 0; + copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u128 as *mut u8, len); + let mask = 0x80808080_80808080_80808080_80808080u128 >> (8 * (16 - len)); + // TODO: could do this with #cfg(target_endian), but this is clearer and + // more confidence-inspiring. + let mask = u128::from_le(mask); + if (word & mask) != 0 { + return Err(Error::NonAscii); + } + if ((mask - word) & mask) != 0 { + return Err(Error::InvalidNull); + } + Ok(Self(NonZeroU128::new_unchecked(word))) + } + } + /// An unsafe constructor intended for cases where the consumer /// guarantees that the input is a little endian integer which /// is a correct representation of a `TinyStr16` string. @@ -275,25 +310,7 @@ impl FromStr for TinyStr16 { #[inline(always)] fn from_str(text: &str) -> Result { - let len = text.len(); - if len < 1 || len > 16 { - return Err(Error::InvalidSize); - } - unsafe { - let mut word: u128 = 0; - copy_nonoverlapping(text.as_ptr(), &mut word as *mut u128 as *mut u8, len); - let mask = 0x80808080_80808080_80808080_80808080u128 >> (8 * (16 - len)); - // TODO: could do this with #cfg(target_endian), but this is clearer and - // more confidence-inspiring. - let mask = u128::from_le(mask); - if (word & mask) != 0 { - return Err(Error::NonAscii); - } - if ((mask - word) & mask) != 0 { - return Err(Error::InvalidNull); - } - Ok(Self(NonZeroU128::new_unchecked(word))) - } + Self::from_bytes(text.as_bytes()) } } diff --git a/src/tinystr4.rs b/src/tinystr4.rs index 2f7cb52..b454f3f 100644 --- a/src/tinystr4.rs +++ b/src/tinystr4.rs @@ -5,7 +5,7 @@ use std::num::NonZeroU32; use std::ops::Deref; use std::str::FromStr; -use crate::helpers::make_4byte_str; +use crate::helpers::make_4byte_bytes; use crate::Error; /// A tiny string that is from 1 to 4 non-NUL ASCII characters. @@ -25,6 +25,31 @@ use crate::Error; pub struct TinyStr4(NonZeroU32); impl TinyStr4 { + /// Creates a TinyStr4 from a byte slice. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr4; + /// + /// let s1 = TinyStr4::from_bytes("Test".as_bytes()) + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1, "Test"); + /// ``` + #[inline(always)] + pub fn from_bytes(bytes: &[u8]) -> Result { + unsafe { + match bytes.len() { + 1 => make_4byte_bytes(bytes, 1, 0x80).map(Self), + 2 => make_4byte_bytes(bytes, 2, 0x8080).map(Self), + 3 => make_4byte_bytes(bytes, 3, 0x0080_8080).map(Self), + 4 => make_4byte_bytes(bytes, 4, 0x8080_8080).map(Self), + _ => Err(Error::InvalidSize), + } + } + } + /// An unsafe constructor intended for cases where the consumer /// guarantees that the input is a little endian integer which /// is a correct representation of a `TinyStr4` string. @@ -257,15 +282,7 @@ impl FromStr for TinyStr4 { #[inline(always)] fn from_str(text: &str) -> Result { - unsafe { - match text.len() { - 1 => make_4byte_str(text, 1, 0x80).map(Self), - 2 => make_4byte_str(text, 2, 0x8080).map(Self), - 3 => make_4byte_str(text, 3, 0x0080_8080).map(Self), - 4 => make_4byte_str(text, 4, 0x8080_8080).map(Self), - _ => Err(Error::InvalidSize), - } - } + Self::from_bytes(text.as_bytes()) } } diff --git a/src/tinystr8.rs b/src/tinystr8.rs index 753bb96..450765b 100644 --- a/src/tinystr8.rs +++ b/src/tinystr8.rs @@ -25,6 +25,41 @@ use crate::Error; pub struct TinyStr8(NonZeroU64); impl TinyStr8 { + /// Creates a TinyStr8 from a byte slice. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyStr8; + /// + /// let s1 = TinyStr8::from_bytes("Testing".as_bytes()) + /// .expect("Failed to parse."); + /// + /// assert_eq!(s1, "Testing"); + /// ``` + #[inline(always)] + pub fn from_bytes(bytes: &[u8]) -> Result { + let len = bytes.len(); + if len < 1 || len > 8 { + return Err(Error::InvalidSize); + } + unsafe { + let mut word: u64 = 0; + copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u64 as *mut u8, len); + let mask = 0x80808080_80808080u64 >> (8 * (8 - len)); + // TODO: could do this with #cfg(target_endian), but this is clearer and + // more confidence-inspiring. + let mask = u64::from_le(mask); + if (word & mask) != 0 { + return Err(Error::NonAscii); + } + if ((mask - word) & mask) != 0 { + return Err(Error::InvalidNull); + } + Ok(Self(NonZeroU64::new_unchecked(word))) + } + } + /// An unsafe constructor intended for cases where the consumer /// guarantees that the input is a little endian integer which /// is a correct representation of a `TinyStr8` string. @@ -267,25 +302,7 @@ impl FromStr for TinyStr8 { #[inline(always)] fn from_str(text: &str) -> Result { - let len = text.len(); - if len < 1 || len > 8 { - return Err(Error::InvalidSize); - } - unsafe { - let mut word: u64 = 0; - copy_nonoverlapping(text.as_ptr(), &mut word as *mut u64 as *mut u8, len); - let mask = 0x80808080_80808080u64 >> (8 * (8 - len)); - // TODO: could do this with #cfg(target_endian), but this is clearer and - // more confidence-inspiring. - let mask = u64::from_le(mask); - if (word & mask) != 0 { - return Err(Error::NonAscii); - } - if ((mask - word) & mask) != 0 { - return Err(Error::InvalidNull); - } - Ok(Self(NonZeroU64::new_unchecked(word))) - } + TinyStr8::from_bytes(text.as_bytes()) } } diff --git a/tests/main.rs b/tests/main.rs index 3eea61b..a3fb78b 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -8,6 +8,19 @@ fn tiny4_basic() { assert_eq!(s.deref(), "abc"); } +#[test] +fn tiny4_from_bytes() { + let s = TinyStr4::from_bytes("abc".as_bytes()).unwrap(); + assert_eq!(s.deref(), "abc"); + + assert_eq!( + TinyStr4::from_bytes(&[0, 159, 146, 150]), + Err(Error::NonAscii) + ); + assert_eq!(TinyStr4::from_bytes(&[]), Err(Error::InvalidSize)); + assert_eq!(TinyStr4::from_bytes(&[0]), Err(Error::InvalidNull)); +} + #[test] fn tiny4_size() { assert_eq!("".parse::(), Err(Error::InvalidSize)); @@ -144,6 +157,19 @@ fn tiny8_basic() { assert_eq!(s.deref(), "abcde"); } +#[test] +fn tiny8_from_bytes() { + let s = TinyStr8::from_bytes("abcde".as_bytes()).unwrap(); + assert_eq!(s.deref(), "abcde"); + + assert_eq!( + TinyStr8::from_bytes(&[0, 159, 146, 150]), + Err(Error::NonAscii) + ); + assert_eq!(TinyStr8::from_bytes(&[]), Err(Error::InvalidSize)); + assert_eq!(TinyStr8::from_bytes(&[0]), Err(Error::InvalidNull)); +} + #[test] fn tiny8_size() { assert_eq!("".parse::(), Err(Error::InvalidSize)); @@ -283,6 +309,19 @@ fn tiny8_debug() { assert_eq!(format!("{:#?}", s), "\"abcdef\""); } +#[test] +fn tiny16_from_bytes() { + let s = TinyStr16::from_bytes("abcdefghijk".as_bytes()).unwrap(); + assert_eq!(s.deref(), "abcdefghijk"); + + assert_eq!( + TinyStr16::from_bytes(&[0, 159, 146, 150]), + Err(Error::NonAscii) + ); + assert_eq!(TinyStr16::from_bytes(&[]), Err(Error::InvalidSize)); + assert_eq!(TinyStr16::from_bytes(&[0]), Err(Error::InvalidNull)); +} + #[test] fn tiny16_size() { assert_eq!("".parse::(), Err(Error::InvalidSize));