diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b8cb9a2..3ab0cf17 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ name: CI env: RUSTFLAGS: "-D warnings" PROPTEST_CASES: 10000 - MIRIFAGS: "-Zmiri-tag-raw-pointers -Zmiri-check-number-validity" + MIRIFLAGS: "-Zmiri-strict-provenance -Zmiri-check-number-validity" jobs: check: diff --git a/compact_str/src/repr/arc/inner.rs b/compact_str/src/repr/arc/inner.rs deleted file mode 100644 index eef6aee6..00000000 --- a/compact_str/src/repr/arc/inner.rs +++ /dev/null @@ -1,90 +0,0 @@ -use core::sync::atomic::AtomicUsize; -use core::{ - ptr, - slice, -}; -use std::alloc; - -const UNKNOWN: usize = 0; -pub type StrBuffer = [u8; UNKNOWN]; - -#[repr(C)] -pub struct ArcStringInner { - pub ref_count: AtomicUsize, - pub capacity: usize, - pub str_buffer: StrBuffer, -} - -impl ArcStringInner { - pub fn with_capacity(capacity: usize) -> ptr::NonNull { - let mut ptr = Self::alloc(capacity); - - // SAFETY: We just allocated an instance of `ArcStringInner` and checked to make sure it - // wasn't null, so we know it's aligned properly, that it points to an instance of - // `ArcStringInner` and that the "lifetime" is valid - unsafe { ptr.as_mut().ref_count = AtomicUsize::new(1) }; - // SAFTEY: Same as above - unsafe { ptr.as_mut().capacity = capacity }; - - ptr - } - - #[inline] - pub fn as_bytes(&self) -> &[u8] { - // SAFETY: Since we have an instance of `ArcStringInner` so we know the buffer is still - // valid, and we track the capacity with the creation and adjustment of the buffer - unsafe { slice::from_raw_parts(self.str_buffer.as_ptr(), self.capacity) } - } - - /// Returns a mutable reference to the underlying buffer of bytes - /// - /// # Invariants - /// * The caller must assert that no other references, or instances of `ArcString` exist before - /// calling this method. Otherwise multiple threads could race writing to the underlying buffer. - /// * The caller must assert that the underlying buffer is still valid UTF-8 - #[inline] - pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] { - // SAFETY: Since we have an instance of `ArcStringInner` so we know the buffer is still - // valid, and we track the capacity with the creation and adjustment of the buffer - // - // Note: In terms of mutability, it's up to the caller to assert the provided bytes are - // value UTF-8 - slice::from_raw_parts_mut(self.str_buffer.as_mut_ptr(), self.capacity) - } - - fn layout(capacity: usize) -> alloc::Layout { - let buffer_layout = alloc::Layout::array::(capacity).unwrap(); - alloc::Layout::new::() - .extend(buffer_layout) - .unwrap() - .0 - .pad_to_align() - } - - pub fn alloc(capacity: usize) -> ptr::NonNull { - let layout = Self::layout(capacity); - debug_assert!(layout.size() > 0); - - // SAFETY: `alloc(...)` has undefined behavior if the layout is zero-sized, but we know the - // size of the layout is greater than 0 because we define it (and check for it above) - let raw_ptr = unsafe { alloc::alloc(layout) as *mut ArcStringInner }; - - // Check to make sure our pointer is non-null, some allocators return null pointers instead - // of panicking - match ptr::NonNull::new(raw_ptr) { - Some(ptr) => ptr, - None => alloc::handle_alloc_error(layout), - } - } - - pub fn dealloc(ptr: ptr::NonNull) { - // SAFETY: We know the pointer is non-null and it is properly aligned - let capacity = unsafe { ptr.as_ref().capacity }; - let layout = Self::layout(capacity); - - // SAFETY: There is only one way to allocate an ArcStringInner, and it uses the same layout - // we defined above. Also we know the pointer is non-null and we use the same global - // allocator as we did in `Self::alloc(...)` - unsafe { alloc::dealloc(ptr.as_ptr() as *mut u8, layout) }; - } -} diff --git a/compact_str/src/repr/arc/mod.rs b/compact_str/src/repr/arc/mod.rs deleted file mode 100644 index 21aadd20..00000000 --- a/compact_str/src/repr/arc/mod.rs +++ /dev/null @@ -1,289 +0,0 @@ -// TODO: Use this module -#![allow(dead_code)] - -use std::iter::Extend; -use std::sync::atomic::Ordering; -use std::{ - fmt, - mem, - ptr, - str, -}; - -mod inner; -use inner::ArcStringInner; -mod writer; -use writer::ArcStringWriter; - -/// A soft limit on the amount of references that may be made to an `Arc`. -/// -/// Going above this limit will abort your program (although not -/// necessarily) at _exactly_ `MAX_REFCOUNT + 1` references. -const MAX_REFCOUNT: usize = (isize::MAX) as usize; - -#[repr(C)] -pub struct ArcString { - len: usize, - ptr: ptr::NonNull, -} -unsafe impl Sync for ArcString {} -unsafe impl Send for ArcString {} - -impl ArcString { - #[inline] - pub fn new(text: &str, additional: usize) -> Self { - let len = text.len(); - - let required = len + additional; - let amortized = 3 * len / 2; - let new_capacity = core::cmp::max(amortized, required); - - // TODO: Handle overflows in the case of __very__ large Strings - debug_assert!(new_capacity >= len); - - let mut ptr = ArcStringInner::with_capacity(new_capacity); - - // SAFETY: We just created the `ArcStringInner` so we know the pointer is properly aligned, - // it is non-null, points to an instance of `ArcStringInner`, and the `str_buffer` - // is valid - let buffer_ptr = unsafe { ptr.as_mut().str_buffer.as_mut_ptr() }; - // SAFETY: We know both `src` and `dest` are valid for respectively reads and writes of - // length `len` because `len` comes from `src`, and `dest` was allocated to be at least that - // length. We also know they're non-overlapping because `dest` is newly allocated - unsafe { buffer_ptr.copy_from_nonoverlapping(text.as_ptr(), len) }; - - ArcString { len, ptr } - } - - #[inline] - pub fn with_capacity(capacity: usize) -> Self { - // We should never be able to programatically create an `ArcString` with a capacity less - // than our max inline size, since then the string should be inlined - debug_assert!(capacity >= super::MAX_SIZE); - - let len = 0; - let ptr = ArcStringInner::with_capacity(capacity); - - ArcString { len, ptr } - } - - #[inline] - pub const fn len(&self) -> usize { - self.len - } - - #[inline] - pub fn capacity(&self) -> usize { - self.inner().capacity - } - - #[inline] - pub fn push(&mut self, ch: char) { - self.writer().push(ch); - } - - #[inline] - pub fn as_str(&self) -> &str { - // SAFETY: The only way you can construct an `ArcString` is via a `&str` so it must be valid - // UTF-8, or the caller has manually made those guarantees - unsafe { str::from_utf8_unchecked(self.as_slice()) } - } - - #[inline(always)] - pub fn as_slice(&self) -> &[u8] { - &self.inner().as_bytes()[..self.len] - } - - /// Returns a mutable reference to the underlying buffer of bytes - /// - /// # SAFETY: - /// * The caller must guarantee any modifications made to the buffer are valid UTF-8 - #[inline] - pub unsafe fn make_mut_slice(&mut self) -> &mut [u8] { - self.writer().into_mut_slice() - } - - #[inline] - pub unsafe fn set_len(&mut self, length: usize) { - self.len = length; - } - - #[inline] - fn writer(&mut self) -> ArcStringWriter<'_> { - ArcStringWriter::new(self) - } - - /// Returns a shared reference to the heap allocated `ArcStringInner` - #[inline] - fn inner(&self) -> &ArcStringInner { - // SAFETY: If we still have an instance of `ArcString` then we know the pointer to - // `ArcStringInner` is valid for at least as long as the provided ref to `self` - unsafe { self.ptr.as_ref() } - } - - #[inline(never)] - unsafe fn drop_inner(&mut self) { - ArcStringInner::dealloc(self.ptr) - } -} - -impl Clone for ArcString { - fn clone(&self) -> Self { - let old_count = self.inner().ref_count.fetch_add(1, Ordering::Relaxed); - assert!( - old_count < MAX_REFCOUNT, - "Program has gone wild, ref count > {}", - MAX_REFCOUNT - ); - - ArcString { - len: self.len, - ptr: self.ptr, - } - } -} - -impl Drop for ArcString { - fn drop(&mut self) { - // This was copied from the implementation of `std::sync::Arc` - // TODO: Better document the safety invariants here - if self.inner().ref_count.fetch_sub(1, Ordering::Release) != 1 { - return; - } - std::sync::atomic::fence(Ordering::Acquire); - unsafe { self.drop_inner() } - } -} - -impl fmt::Debug for ArcString { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.as_str(), f) - } -} - -impl From<&str> for ArcString { - #[inline] - fn from(text: &str) -> Self { - ArcString::new(text, 0) - } -} - -impl Extend for ArcString { - #[inline] - fn extend>(&mut self, iter: T) { - self.writer().extend(iter); - } -} - -impl<'c> Extend<&'c char> for ArcString { - #[inline] - fn extend>(&mut self, iter: T) { - self.writer().extend(iter); - } -} - -impl<'s> Extend<&'s str> for ArcString { - #[inline] - fn extend>(&mut self, iter: T) { - self.writer().extend(iter); - } -} - -impl Extend> for ArcString { - #[inline] - fn extend>>(&mut self, iter: T) { - self.writer().extend(iter); - } -} - -impl Extend for ArcString { - #[inline] - fn extend>(&mut self, iter: T) { - self.writer().extend(iter); - } -} - -#[cfg(test)] -mod test { - use proptest::prelude::*; - use test_strategy::proptest; - - use super::ArcString; - use crate::tests::rand_unicode; - - #[test] - fn test_empty() { - let empty = ""; - let arc_str = ArcString::from(empty); - - assert_eq!(arc_str.as_str(), empty); - assert_eq!(arc_str.len, empty.len()); - } - - #[test] - fn test_long() { - let long = "aaabbbcccdddeeefff\n - ggghhhiiijjjkkklll\n - mmmnnnooopppqqqrrr\n - ssstttuuuvvvwwwxxx\n - yyyzzz000111222333\n - 444555666777888999000"; - let arc_str = ArcString::from(long); - - assert_eq!(arc_str.as_str(), long); - assert_eq!(arc_str.len, long.len()); - } - - #[test] - fn test_clone_and_drop() { - let example = "hello world!"; - let arc_str_1 = ArcString::from(example); - let arc_str_2 = arc_str_1.clone(); - - drop(arc_str_1); - - assert_eq!(arc_str_2.as_str(), example); - assert_eq!(arc_str_2.len, example.len()); - } - - #[test] - fn test_extend_chars() { - let example = "hello"; - let mut arc = ArcString::from(example); - - arc.extend(" world!".chars()); - - assert_eq!(arc.as_str(), "hello world!"); - assert_eq!(arc.len(), 12); - } - - #[test] - fn test_extend_strs() { - let example = "hello"; - let mut arc = ArcString::from(example); - - let words = vec![" ", "world!", "my name is", " compact", "_str"]; - arc.extend(words); - - assert_eq!(arc.as_str(), "hello world!my name is compact_str"); - assert_eq!(arc.len(), 34); - } - - #[test] - fn test_sanity() { - let example = "hello world!"; - let arc_str = ArcString::from(example); - - assert_eq!(arc_str.as_str(), example); - assert_eq!(arc_str.len, example.len()); - } - - #[proptest] - #[cfg_attr(miri, ignore)] - fn test_strings_roundtrip(#[strategy(rand_unicode())] word: String) { - let arc_str = ArcString::from(word.as_str()); - prop_assert_eq!(&word, arc_str.as_str()); - } -} - -crate::asserts::assert_size!(ArcString, 2 * mem::size_of::()); diff --git a/compact_str/src/repr/arc/writer.rs b/compact_str/src/repr/arc/writer.rs deleted file mode 100644 index 377386a0..00000000 --- a/compact_str/src/repr/arc/writer.rs +++ /dev/null @@ -1,156 +0,0 @@ -use core::iter::Extend; -use core::sync::atomic::Ordering; - -use super::ArcString; - -/// An `ArcStringWriter` provides safe mutable access to the underlying buffer of an `ArcString`. -/// -/// To create an `ArcStringWriter`, we first check to make sure we have a unique reference to the -/// underlying buffer of an `ArcString`, if we don't then we create one. Knowing we hold a unique -/// reference to the `ArcString` allows us to make multiple modifications to the buffer, this is -/// particularly beneficial for something like the `core::iter::Extend` trait -pub struct ArcStringWriter<'a> { - arc_string: &'a mut ArcString, -} - -impl<'a> ArcStringWriter<'a> { - #[inline] - pub fn new(arc_string: &'a mut ArcString) -> Self { - if arc_string - .inner() - .ref_count - .compare_exchange(1, 0, Ordering::Acquire, Ordering::Relaxed) - .is_err() - { - // There is more than one reference to this underlying buffer, so we need to make a new - // instance and decrement the count of the original by one - - // Make a new instance with the same capacity as self - let additional = arc_string.capacity() - arc_string.len(); - let new = ArcString::new(arc_string.as_str(), additional); - - // Assign arc_string to our new instsance, this drops the old ArcString, which - // decrements its ref count - *arc_string = new; - } else { - // We were the sole reference of either kind; bump back up the strong ref count. - arc_string.inner().ref_count.store(1, Ordering::Release); - } - - Self { arc_string } - } - - /// Reserve space for at least `additional` bytes - #[inline] - pub fn reserve(&mut self, additional: usize) { - // We need at least this much space - let new_capacity = self.arc_string.len() + additional; - - // We have enough space, so there is no work to do - if self.arc_string.capacity() >= new_capacity { - return; - } - - // Create a new `ArcString` with enough space for at least `additional` bytes, dropping the - // old one - *self.arc_string = ArcString::new(self.arc_string.as_str(), additional); - } - - #[inline] - pub fn push(&mut self, ch: char) { - let len = self.arc_string.len(); - let char_len = ch.len_utf8(); - - // Reserve at least enough space for the new char - self.reserve(char_len); - - // SAFETY: We're writing a char into the slice, which is valid UTF-8 - let slice = unsafe { self.as_mut_slice() }; - - // Write the char into the slice - ch.encode_utf8(&mut slice[len..]); - // Increment our length - // - // SAFETY: We just wrote `char_len` bytes into the buffer, so we know this new length is - // valid - unsafe { self.arc_string.set_len(len + char_len) }; - } - - #[inline] - pub fn push_str(&mut self, s: &str) { - let len = self.arc_string.len(); - let str_len = s.len(); - - // Reserve at least enough space for the new str - self.reserve(str_len); - - // SAFETY: We're writing a &str into the slice, which is valid UTF-8 - let slice = unsafe { self.as_mut_slice() }; - let buffer = &mut slice[len..len + str_len]; - - debug_assert_eq!(buffer.len(), s.as_bytes().len()); - - // Copy the string into our buffer - buffer.copy_from_slice(s.as_bytes()); - // Incrament the length of our string - unsafe { self.arc_string.set_len(len + str_len) }; - } - - /// Returns a reference to a mutable slice of bytes with lifetime `'a` - /// - /// # SAFETY - /// * Callers must guarantee that any modifications they make to the slice are valid UTF-8 - #[inline] - pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] { - self.arc_string.ptr.as_mut().as_mut_bytes() - } - - /// Transforms the `ArcStringWriter<'a>` into a mutable slice of bytes with lifetime `'a` - /// - /// # SAFETY - /// * Callers must guarantee that any modifications they make to the slice are valid UTF-8 - #[inline] - pub unsafe fn into_mut_slice(self) -> &'a mut [u8] { - // SAFETY: If we still have an instance of `ArcString` then we know the pointer to - // `ArcStringInner` is valid for at least as long as the provided ref to `self` - self.arc_string.ptr.as_mut().as_mut_bytes() - } -} - -impl<'a> Extend for ArcStringWriter<'a> { - #[inline] - fn extend>(&mut self, iter: T) { - let iterator = iter.into_iter(); - let (lower_bound, _) = iterator.size_hint(); - self.reserve(lower_bound); - iterator.for_each(|c| self.push(c)); - } -} - -impl<'c, 'a> Extend<&'c char> for ArcStringWriter<'a> { - #[inline] - fn extend>(&mut self, iter: T) { - self.extend(iter.into_iter().copied()); - } -} - -impl<'s, 'a> Extend<&'s str> for ArcStringWriter<'a> { - #[inline] - fn extend>(&mut self, iter: T) { - iter.into_iter().for_each(|s| self.push_str(s)); - } -} - -impl<'a> Extend> for ArcStringWriter<'a> { - #[inline] - fn extend>>(&mut self, iter: T) { - iter.into_iter().for_each(move |s| self.push_str(&s)); - } -} - -impl<'a> Extend for ArcStringWriter<'a> { - #[inline] - fn extend>(&mut self, iter: T) { - iter.into_iter().for_each(move |s| self.push_str(&s)); - } -} diff --git a/compact_str/src/repr/boxed/mod.rs b/compact_str/src/repr/boxed/mod.rs index dfa3ba1f..e839540d 100644 --- a/compact_str/src/repr/boxed/mod.rs +++ b/compact_str/src/repr/boxed/mod.rs @@ -1,4 +1,5 @@ use core::iter::Extend; +use core::mem::ManuallyDrop; use core::{ fmt, ptr, @@ -137,12 +138,12 @@ impl BoxString { // Note: We should never hit this case when using BoxString with CompactString Ok(_) if s.capacity() == 0 => BoxString::new(""), Ok(cap) => { + // Don't drop `s` to avoid a double free + let mut s = ManuallyDrop::new(s.into_bytes()); let len = s.len(); - let raw_ptr = s.as_ptr() as *mut u8; + let raw_ptr = s.as_mut_ptr(); let ptr = ptr::NonNull::new(raw_ptr).expect("string with capacity has null ptr?"); - // "forget" `s` so we don't call Drop and deallocate the underlying buffer - core::mem::forget(s); // create a new BoxString with our parts! BoxString { len, ptr, cap } } @@ -157,11 +158,10 @@ impl BoxString { Ok(_) if b.len() == 0 => BoxString::new(""), Ok(cap) => { let len = b.len(); - let raw_ptr = b.as_ptr() as *mut u8; + // Don't drop the box here + let raw_ptr = Box::into_raw(b).cast::(); let ptr = ptr::NonNull::new(raw_ptr).expect("string with capacity has null ptr?"); - // "forget" `s` so we don't call Drop and deallocate the underlying buffer - core::mem::forget(b); // create a new BoxString with our parts! BoxString { len, ptr, cap } } diff --git a/compact_str/src/repr/mod.rs b/compact_str/src/repr/mod.rs index 1d8a15c7..9fdc44be 100644 --- a/compact_str/src/repr/mod.rs +++ b/compact_str/src/repr/mod.rs @@ -9,7 +9,6 @@ mod bytes; mod iter; -mod arc; mod boxed; mod discriminant; mod heap;