Skip to content

Commit

Permalink
Enable creating from [u8]
Browse files Browse the repository at this point in the history
  • Loading branch information
zbraniecki committed Oct 24, 2019
1 parent 7389731 commit e053fd6
Show file tree
Hide file tree
Showing 8 changed files with 296 additions and 99 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ categories = ["data-structures"]
[dev-dependencies]
criterion = "0.3"

[[bench]]
name = "construct"
harness = false

[[bench]]
name = "tinystr"
harness = false
151 changes: 151 additions & 0 deletions benches/construct.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
use criterion::black_box;
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Bencher;
use criterion::Criterion;
use criterion::Fun;

use tinystr::{TinyStr16, TinyStr4, TinyStr8};

static STRINGS_4: &[&str] = &[
"US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK",
];

static STRINGS_8: &[&str] = &[
"Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419",
"und", "UK",
];

static STRINGS_16: &[&str] = &[
"Latn",
"windows",
"AR",
"Hans",
"macos",
"AT",
"infiniband",
"FR",
"en",
"Cyrl",
"FromIntegral",
"NO",
"419",
"MacintoshOSX2019",
"UK",
];

macro_rules! bench_block {
($c:expr, $name:expr, $action:ident) => {
let funcs = vec![
Fun::new("String", $action!(String)),
Fun::new("TinyStr4", $action!(TinyStr4)),
Fun::new("TinyStr8", $action!(TinyStr8)),
Fun::new("TinyStr16", $action!(TinyStr16)),
];

$c.bench_functions(&format!("{}/4", $name), funcs, STRINGS_4);

let funcs = vec![
Fun::new("String", $action!(String)),
Fun::new("TinyStr8", $action!(TinyStr8)),
Fun::new("TinyStr16", $action!(TinyStr16)),
];

$c.bench_functions(&format!("{}/8", $name), funcs, STRINGS_8);

let funcs = vec![
Fun::new("String", $action!(String)),
Fun::new("TinyStr16", $action!(TinyStr16)),
];

$c.bench_functions(&format!("{}/16", $name), funcs, STRINGS_16);
};
}

fn construct_from_str(c: &mut Criterion) {
macro_rules! cfs {
($r:ty) => {
|b: &mut Bencher, strings: &&[&str]| {
b.iter(|| {
for s in *strings {
let _: $r = black_box(s.parse().unwrap());
}
})
}
};
};

bench_block!(c, "construct_from_str", cfs);
}

fn construct_from_bytes(c: &mut Criterion) {
macro_rules! cfu {
($r:ty) => {
|b, inputs: &&[&str]| {
let raw: Vec<&[u8]> = inputs.iter().map(|s| s.as_bytes()).collect();
b.iter(move || {
for u in &raw {
let _ = black_box(<$r>::from_bytes(*u).unwrap());
}
})
}
};
};

let funcs = vec![
Fun::new("TinyStr4", cfu!(TinyStr4)),
Fun::new("TinyStr8", cfu!(TinyStr8)),
Fun::new("TinyStr16", cfu!(TinyStr16)),
];

c.bench_functions("construct_from_bytes/4", funcs, STRINGS_4);

let funcs = vec![
Fun::new("TinyStr8", cfu!(TinyStr8)),
Fun::new("TinyStr16", cfu!(TinyStr16)),
];

c.bench_functions("construct_from_bytes/8", funcs, STRINGS_8);

let funcs = vec![Fun::new("TinyStr16", cfu!(TinyStr16))];

c.bench_functions("construct_from_bytes/16", funcs, STRINGS_16);
}

fn construct_unchecked(c: &mut Criterion) {
macro_rules! cu {
($tty:ty, $rty:ty) => {
|b, inputs: &&[&str]| {
let raw: Vec<$rty> = inputs
.iter()
.map(|s| s.parse::<$tty>().unwrap().into())
.collect();
b.iter(move || {
for num in &raw {
let _ = unsafe { <$tty>::new_unchecked(black_box(*num)) };
}
})
}
};
};

let funcs = vec![Fun::new("TinyStr4", cu!(TinyStr4, u32))];

c.bench_functions("construct_unchecked/4", funcs, STRINGS_4);

let funcs = vec![Fun::new("TinyStr8", cu!(TinyStr8, u64))];

c.bench_functions("construct_unchecked/8", funcs, STRINGS_8);

let funcs = vec![Fun::new("TinyStr16", cu!(TinyStr16, u128))];

c.bench_functions("construct_unchecked/16", funcs, STRINGS_16);
}

criterion_group!(
benches,
construct_from_str,
construct_from_bytes,
construct_unchecked,
);
criterion_main!(benches);
48 changes: 0 additions & 48 deletions benches/tinystr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,52 +62,6 @@ macro_rules! bench_block {
};
}

fn construct_from_str(c: &mut Criterion) {
macro_rules! cfs {
($r:ty) => {
|b: &mut Bencher, strings: &&[&str]| {
b.iter(|| {
for s in *strings {
let _: $r = black_box(s.parse().unwrap());
}
})
}
};
};

bench_block!(c, "construct_from_str", cfs);
}

fn construct_unchecked(c: &mut Criterion) {
macro_rules! cu {
($tty:ty, $rty:ty) => {
|b, inputs: &&[&str]| {
let raw: Vec<$rty> = inputs
.iter()
.map(|s| s.parse::<$tty>().unwrap().into())
.collect();
b.iter(move || {
for num in &raw {
let _ = unsafe { <$tty>::new_unchecked(black_box(*num)) };
}
})
}
};
};

let funcs = vec![Fun::new("TinyStr4", cu!(TinyStr4, u32))];

c.bench_functions("construct_unchecked/4", funcs, STRINGS_4);

let funcs = vec![Fun::new("TinyStr8", cu!(TinyStr8, u64))];

c.bench_functions("construct_unchecked/8", funcs, STRINGS_8);

let funcs = vec![Fun::new("TinyStr16", cu!(TinyStr16, u128))];

c.bench_functions("construct_unchecked/16", funcs, STRINGS_16);
}

macro_rules! convert_to_ascii {
($ty:ty, $action:ident) => {
|b: &mut Bencher, inputs: &&[&str]| {
Expand Down Expand Up @@ -213,8 +167,6 @@ fn test_eq(c: &mut Criterion) {

criterion_group!(
benches,
construct_from_str,
construct_unchecked,
convert_to_ascii_lowercase,
convert_to_ascii_uppercase,
convert_to_ascii_titlecase,
Expand Down
6 changes: 3 additions & 3 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ use std::ptr::copy_nonoverlapping;
use super::Error;

#[inline(always)]
pub(crate) unsafe fn make_4byte_str(
text: &str,
pub(crate) unsafe fn make_4byte_bytes(
bytes: &[u8],
len: usize,
mask: u32,
) -> Result<NonZeroU32, Error> {
// Mask is always supplied as little-endian.
let mask = u32::from_le(mask);
let mut word: u32 = 0;
copy_nonoverlapping(text.as_ptr(), &mut word as *mut u32 as *mut u8, len);
copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u32 as *mut u8, len);
if (word & mask) != 0 {
return Err(Error::NonAscii);
}
Expand Down
55 changes: 36 additions & 19 deletions src/tinystr16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,41 @@ use crate::Error;
pub struct TinyStr16(NonZeroU128);

impl TinyStr16 {
/// Creates a TinyStr16 from a byte slice.
///
/// # Examples
///
/// ```
/// use tinystr::TinyStr16;
///
/// let s1 = TinyStr16::from_bytes("Testing".as_bytes())
/// .expect("Failed to parse.");
///
/// assert_eq!(s1, "Testing");
/// ```
#[inline(always)]
pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
let len = bytes.len();
if len < 1 || len > 16 {
return Err(Error::InvalidSize);
}
unsafe {
let mut word: u128 = 0;
copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u128 as *mut u8, len);
let mask = 0x80808080_80808080_80808080_80808080u128 >> (8 * (16 - len));
// TODO: could do this with #cfg(target_endian), but this is clearer and
// more confidence-inspiring.
let mask = u128::from_le(mask);
if (word & mask) != 0 {
return Err(Error::NonAscii);
}
if ((mask - word) & mask) != 0 {
return Err(Error::InvalidNull);
}
Ok(Self(NonZeroU128::new_unchecked(word)))
}
}

/// An unsafe constructor intended for cases where the consumer
/// guarantees that the input is a little endian integer which
/// is a correct representation of a `TinyStr16` string.
Expand Down Expand Up @@ -275,25 +310,7 @@ impl FromStr for TinyStr16 {

#[inline(always)]
fn from_str(text: &str) -> Result<Self, Self::Err> {
let len = text.len();
if len < 1 || len > 16 {
return Err(Error::InvalidSize);
}
unsafe {
let mut word: u128 = 0;
copy_nonoverlapping(text.as_ptr(), &mut word as *mut u128 as *mut u8, len);
let mask = 0x80808080_80808080_80808080_80808080u128 >> (8 * (16 - len));
// TODO: could do this with #cfg(target_endian), but this is clearer and
// more confidence-inspiring.
let mask = u128::from_le(mask);
if (word & mask) != 0 {
return Err(Error::NonAscii);
}
if ((mask - word) & mask) != 0 {
return Err(Error::InvalidNull);
}
Ok(Self(NonZeroU128::new_unchecked(word)))
}
Self::from_bytes(text.as_bytes())
}
}

Expand Down
37 changes: 27 additions & 10 deletions src/tinystr4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::num::NonZeroU32;
use std::ops::Deref;
use std::str::FromStr;

use crate::helpers::make_4byte_str;
use crate::helpers::make_4byte_bytes;
use crate::Error;

/// A tiny string that is from 1 to 4 non-NUL ASCII characters.
Expand All @@ -25,6 +25,31 @@ use crate::Error;
pub struct TinyStr4(NonZeroU32);

impl TinyStr4 {
/// Creates a TinyStr4 from a byte slice.
///
/// # Examples
///
/// ```
/// use tinystr::TinyStr4;
///
/// let s1 = TinyStr4::from_bytes("Test".as_bytes())
/// .expect("Failed to parse.");
///
/// assert_eq!(s1, "Test");
/// ```
#[inline(always)]
pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
unsafe {
match bytes.len() {
1 => make_4byte_bytes(bytes, 1, 0x80).map(Self),
2 => make_4byte_bytes(bytes, 2, 0x8080).map(Self),
3 => make_4byte_bytes(bytes, 3, 0x0080_8080).map(Self),
4 => make_4byte_bytes(bytes, 4, 0x8080_8080).map(Self),
_ => Err(Error::InvalidSize),
}
}
}

/// An unsafe constructor intended for cases where the consumer
/// guarantees that the input is a little endian integer which
/// is a correct representation of a `TinyStr4` string.
Expand Down Expand Up @@ -257,15 +282,7 @@ impl FromStr for TinyStr4 {

#[inline(always)]
fn from_str(text: &str) -> Result<Self, Self::Err> {
unsafe {
match text.len() {
1 => make_4byte_str(text, 1, 0x80).map(Self),
2 => make_4byte_str(text, 2, 0x8080).map(Self),
3 => make_4byte_str(text, 3, 0x0080_8080).map(Self),
4 => make_4byte_str(text, 4, 0x8080_8080).map(Self),
_ => Err(Error::InvalidSize),
}
}
Self::from_bytes(text.as_bytes())
}
}

Expand Down
Loading

0 comments on commit e053fd6

Please sign in to comment.