Skip to content

Commit

Permalink
add Byteview::from_reader
Browse files Browse the repository at this point in the history
  • Loading branch information
marvin-j97 committed Dec 14, 2024
1 parent a8700ef commit d1ab841
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 76 deletions.
94 changes: 87 additions & 7 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use byteview::ByteView;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::time::Duration;
use std::{
io::{Cursor, Read},
time::Duration,
};

fn cmp_short(c: &mut Criterion) {
let mut group = c.benchmark_group("cmp short");
Expand Down Expand Up @@ -198,22 +201,99 @@ fn eq_long(c: &mut Criterion) {
}
}

fn ctor(c: &mut Criterion) {
let mut group = c.benchmark_group("ctor long");
fn ctor_short(c: &mut Criterion) {
let mut group = c.benchmark_group("ctor short");

let value = b"abcdefabcdef";

group.bench_function("Arc'd slice", |b| {
b.iter(|| {
let _x: std::sync::Arc<[u8]> =
std::sync::Arc::from(nanoid::nanoid!().clone().as_bytes());
let _x = std::sync::Arc::from(value);
});
});

group.bench_function("ByteView", |b| {
b.iter(|| {
let _x = ByteView::from(nanoid::nanoid!());
let _x = ByteView::from(*value);
});
});
}

fn ctor_long(c: &mut Criterion) {
let mut group = c.benchmark_group("ctor ctor_long");

let value = b"abcdefabcdefabcdefabcdefabcdefabcdef";

group.bench_function("Arc'd slice", |b| {
b.iter(|| {
let _x = std::sync::Arc::from(value);
});
});

group.bench_function("ByteView", |b| {
b.iter(|| {
let _x = ByteView::from(*value);
});
});
}

// Simulates `lsm-tree`-like deserializing of KV values
fn ctor_from_reader(c: &mut Criterion) {
use std::sync::Arc;

let mut group = c.benchmark_group("ctor long from reader");

let value = b"abcdefabcdefabcdefabcdefabcdefabcdef";

group.bench_function("Arc'd slice", |b| {
b.iter(|| {
let mut c = Cursor::new(value);
let mut v = vec![0; value.len()];
c.read_exact(&mut v).unwrap();
let _x: Arc<[u8]> = v.into();
});
});

group.bench_function("Arc'd slice - preallocated", |b| {
b.iter(|| {
let mut c = Cursor::new(value);

let v = vec![0; value.len()];
let mut v: Arc<[u8]> = v.into();

let builder = Arc::get_mut(&mut v).unwrap();
c.read_exact(builder).unwrap();
});
});

group.bench_function("ByteView::with_size", |b| {
b.iter(|| {
let mut c = Cursor::new(value);

let mut x = ByteView::with_size(value.len());
{
let mut builder = x.get_mut().unwrap();
c.read_exact(&mut builder).unwrap();
}
});
});

group.bench_function("ByteView::from_reader", |b| {
b.iter(|| {
let mut c = Cursor::new(value);
let _x = ByteView::from_reader(&mut c, value.len()).unwrap();
});
});
}

criterion_group!(benches, eq_short, eq_long, cmp_short, cmp_long, ctor);
criterion_group!(
benches,
ctor_short,
ctor_long,
ctor_from_reader,
eq_short,
eq_long,
cmp_short,
cmp_long,
);
criterion_main!(benches);
95 changes: 28 additions & 67 deletions src/byteview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ impl std::hash::Hash for ByteView {

/// RAII guard for [`ByteView::get_mut`], so the prefix gets
/// updated properly when the mutation is done
pub struct Mutator<'a>(&'a mut ByteView);
pub struct Mutator<'a>(pub(crate) &'a mut ByteView);

impl<'a> std::ops::Deref for Mutator<'a> {
type Target = [u8];
Expand Down Expand Up @@ -253,6 +253,21 @@ impl ByteView {
}
}

/// Creates a slice and populates it with `len` bytes
/// from the given reader.
///
/// # Errors
///
/// Returns an error if an I/O exception occurred.
pub fn from_reader<R: std::io::Read>(reader: &mut R, len: usize) -> std::io::Result<Self> {
let mut s = Self::with_size(len);
{
let mut mutator = Mutator(&mut s);
reader.read_exact(&mut mutator)?;
}
Ok(s)
}

/// Creates a new zeroed, fixed-length byteview.
///
/// Use [`ByteView::get_mut`] to mutate the content.
Expand Down Expand Up @@ -344,72 +359,6 @@ impl ByteView {
}

view

/* let slice_len = slice.len();
let Ok(len) = u32::try_from(slice_len) else {
panic!("byte slice too long");
};
let mut builder = Self {
trailer: Trailer {
short: ManuallyDrop::new(ShortRepr {
len,
data: [0; INLINE_SIZE],
}),
},
};
if builder.is_inline() {
// SAFETY: We check for inlinability
// so we know the the input slice fits our buffer
unsafe {
let base_ptr = std::ptr::addr_of_mut!(builder) as *mut u8;
let prefix_offset = base_ptr.add(std::mem::size_of::<u32>());
std::ptr::copy_nonoverlapping(slice.as_ptr(), prefix_offset, slice_len);
}
} else {
unsafe {
(*builder.trailer.long)
.prefix
.copy_from_slice(&slice[0..PREFIX_SIZE]);
let header_size = std::mem::size_of::<HeapAllocationHeader>();
let alignment = std::mem::align_of::<HeapAllocationHeader>();
let total_size = header_size + slice_len;
let layout = std::alloc::Layout::from_size_align(total_size, alignment).unwrap();
let heap_ptr = std::alloc::alloc(layout);
if heap_ptr.is_null() {
std::alloc::handle_alloc_error(layout);
}
// SAFETY: We store a pointer to the copied slice, which comes directly after the header
(*builder.trailer.long).data =
heap_ptr.add(std::mem::size_of::<HeapAllocationHeader>());
// Copy byte slice into heap allocation
std::ptr::copy_nonoverlapping(
slice.as_ptr(),
(*builder.trailer.long).data.cast_mut(),
slice_len,
);
// Set pointer to heap allocation address
(*builder.trailer.long).heap = heap_ptr;
// Set ref count
let heap_region = heap_ptr as *const HeapAllocationHeader;
let heap_region = &*heap_region;
heap_region.ref_count.store(1, Ordering::Release);
}
}
debug_assert_eq!(slice, &*builder);
debug_assert_eq!(1, builder.ref_count());
debug_assert_eq!(builder.len(), slice.len());
builder */
}

fn get_heap_region(&self) -> &HeapAllocationHeader {
Expand Down Expand Up @@ -736,6 +685,7 @@ mod serde {
#[cfg(test)]
mod tests {
use super::{ByteView, HeapAllocationHeader};
use std::io::Cursor;

#[test]
#[cfg(target_pointer_width = "64")]
Expand All @@ -758,6 +708,17 @@ mod tests {
);
}

#[test]
fn from_reader_1() -> std::io::Result<()> {
let str = b"abcdef";
let mut cursor = Cursor::new(str);

let a = ByteView::from_reader(&mut cursor, 6)?;
assert!(&*a == b"abcdef");

Ok(())
}

#[test]
fn cmp_misc_1() {
let a = ByteView::from("abcdef");
Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
clippy::nursery,
clippy::expect_used,
clippy::unwrap_used,
clippy::indexing_slicing
clippy::indexing_slicing,
clippy::needless_lifetimes
)]

mod byteview;
Expand Down
29 changes: 28 additions & 1 deletion src/strview.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::ByteView;
use crate::{byteview::Mutator, ByteView};
use std::{ops::Deref, sync::Arc};

/// An immutable, UTF-8–encoded string slice
Expand Down Expand Up @@ -47,6 +47,21 @@ impl StrView {
Self(ByteView::new(s.as_bytes()))
}

/// Creates a new string and populates it with `len` bytes
/// from the given reader.
///
/// # Errors
///
/// Returns an error if an I/O exception occurred.
pub fn from_reader<R: std::io::Read>(reader: &mut R, len: usize) -> std::io::Result<Self> {
let mut s = ByteView::with_size(len);
{
let mut mutator = Mutator(&mut s);
reader.read_exact(&mut mutator)?;
}
Ok(Self(s))
}

/// Clones the contents of this string into a string.
#[must_use]
pub fn to_owned(&self) -> String {
Expand Down Expand Up @@ -176,6 +191,18 @@ mod serde {
#[cfg(test)]
mod tests {
use super::StrView;
use std::io::Cursor;

#[test]
fn from_reader_1() -> std::io::Result<()> {
let str = "abcdef";
let mut cursor = Cursor::new(str);

let a = StrView::from_reader(&mut cursor, 6)?;
assert!(&*a == "abcdef");

Ok(())
}

#[test]
fn cmp_misc_1() {
Expand Down

0 comments on commit d1ab841

Please sign in to comment.