From 6bc772cdc0a9021a6286ebd550c2f6221179f21d Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 6 Jan 2021 16:34:13 -0500 Subject: [PATCH 1/8] Re-stabilize Weak::as_ptr &friends for unsized T As per T-lang consensus, this uses a branch to handle the dangling case. The discussed optimization of only doing the branch in the T: ?Sized case is left for a followup patch, as doing so is not trivial (as it requires specialization for correctness, not just optimization). --- library/alloc/src/lib.rs | 1 + library/alloc/src/rc.rs | 41 ++++++++++++++++++--------------- library/alloc/src/rc/tests.rs | 41 +++++++++++++++++++++++++++++++++ library/alloc/src/sync.rs | 41 +++++++++++++++++++-------------- library/alloc/src/sync/tests.rs | 41 +++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 35 deletions(-) diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index cfad111aa546f..d0bfa038aa13d 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -120,6 +120,7 @@ #![feature(receiver_trait)] #![cfg_attr(bootstrap, feature(min_const_generics))] #![feature(min_specialization)] +#![feature(set_ptr_value)] #![feature(slice_ptr_get)] #![feature(slice_ptr_len)] #![feature(staged_api)] diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 8183a582d337a..48b9b8a34f1da 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -1862,7 +1862,7 @@ struct WeakInner<'a> { strong: &'a Cell, } -impl Weak { +impl Weak { /// Returns a raw pointer to the object `T` pointed to by this `Weak`. /// /// The pointer is valid only if there are some strong references. The pointer may be dangling, @@ -1892,15 +1892,17 @@ impl Weak { pub fn as_ptr(&self) -> *const T { let ptr: *mut RcBox = NonNull::as_ptr(self.ptr); - // SAFETY: we must offset the pointer manually, and said pointer may be - // a dangling weak (usize::MAX) if T is sized. data_offset is safe to call, - // because we know that a pointer to unsized T was derived from a real - // unsized T, as dangling weaks are only created for sized T. wrapping_offset - // is used so that we can use the same code path for the non-dangling - // unsized case and the potentially dangling sized case. - unsafe { - let offset = data_offset(ptr as *mut T); - set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset)) + if is_dangling(self.ptr) { + // If the pointer is dangling, we return a null pointer as the dangling sentinel. + // We can't return the usize::MAX sentinel, as that could valid if T is ZST. + // SAFETY: we have to return a known sentinel here that cannot be produced for + // a valid pointer, so that `from_raw` can reverse this transformation. + (ptr as *mut T).set_ptr_value(ptr::null_mut()) + } else { + // SAFETY: If the pointer is not dangling, it describes to a valid allocation. + // The payload may be dropped at this point, and we have to maintain provenance, + // so use raw pointer manipulation. + unsafe { &raw mut (*ptr).value } } } @@ -1982,22 +1984,25 @@ impl Weak { /// [`new`]: Weak::new #[stable(feature = "weak_into_raw", since = "1.45.0")] pub unsafe fn from_raw(ptr: *const T) -> Self { - // SAFETY: data_offset is safe to call, because this pointer originates from a Weak. // See Weak::as_ptr for context on how the input pointer is derived. - let offset = unsafe { data_offset(ptr) }; - // Reverse the offset to find the original RcBox. - // SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized). - let ptr = unsafe { - set_data_ptr(ptr as *mut RcBox, (ptr as *mut u8).wrapping_offset(-offset)) + let ptr = if ptr.is_null() { + // If we get a null pointer, this is a dangling weak. + // SAFETY: this is the same sentinel as used in Weak::new and is_dangling + (ptr as *mut RcBox).set_ptr_value(usize::MAX as *mut _) + } else { + // Otherwise, this describes a real allocation. + // SAFETY: data_offset is safe to call, as ptr describes a real allocation. + let offset = unsafe { data_offset(ptr) }; + // Thus, we reverse the offset to get the whole RcBox. + // SAFETY: the pointer originated from a Weak, so this offset is safe. + unsafe { (ptr as *mut RcBox).set_ptr_value((ptr as *mut u8).offset(-offset)) } }; // SAFETY: we now have recovered the original Weak pointer, so can create the Weak. Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } } } -} -impl Weak { /// Attempts to upgrade the `Weak` pointer to an [`Rc`], delaying /// dropping of the inner value if successful. /// diff --git a/library/alloc/src/rc/tests.rs b/library/alloc/src/rc/tests.rs index 2d183a8c88c64..843a9b07fa934 100644 --- a/library/alloc/src/rc/tests.rs +++ b/library/alloc/src/rc/tests.rs @@ -208,6 +208,30 @@ fn into_from_weak_raw() { } } +#[test] +fn test_into_from_weak_raw_unsized() { + use std::fmt::Display; + use std::string::ToString; + + let arc: Rc = Rc::from("foo"); + let weak: Weak = Rc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }, "foo"); + assert!(weak.ptr_eq(&weak2)); + + let arc: Rc = Rc::new(123); + let weak: Weak = Rc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }.to_string(), "123"); + assert!(weak.ptr_eq(&weak2)); +} + #[test] fn get_mut() { let mut x = Rc::new(3); @@ -294,6 +318,23 @@ fn test_unsized() { assert_eq!(foo, foo.clone()); } +#[test] +fn test_maybe_thin_unsized() { + // If/when custom thin DSTs exist, this test should be updated to use one + use std::ffi::{CStr, CString}; + + let x: Rc = Rc::from(CString::new("swordfish").unwrap().into_boxed_c_str()); + assert_eq!(format!("{:?}", x), "\"swordfish\""); + let y: Weak = Rc::downgrade(&x); + drop(x); + + // At this point, the weak points to a dropped DST + assert!(y.upgrade().is_none()); + // But we still need to be able to get the alloc layout to drop. + // CStr has no drop glue, but custom DSTs might, and need to work. + drop(y); +} + #[test] fn test_from_owned() { let foo = 123; diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 06ad621727166..ae61f4a2384b6 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1648,7 +1648,7 @@ struct WeakInner<'a> { strong: &'a atomic::AtomicUsize, } -impl Weak { +impl Weak { /// Returns a raw pointer to the object `T` pointed to by this `Weak`. /// /// The pointer is valid only if there are some strong references. The pointer may be dangling, @@ -1678,15 +1678,17 @@ impl Weak { pub fn as_ptr(&self) -> *const T { let ptr: *mut ArcInner = NonNull::as_ptr(self.ptr); - // SAFETY: we must offset the pointer manually, and said pointer may be - // a dangling weak (usize::MAX) if T is sized. data_offset is safe to call, - // because we know that a pointer to unsized T was derived from a real - // unsized T, as dangling weaks are only created for sized T. wrapping_offset - // is used so that we can use the same code path for the non-dangling - // unsized case and the potentially dangling sized case. - unsafe { - let offset = data_offset(ptr as *mut T); - set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset)) + if is_dangling(self.ptr) { + // If the pointer is dangling, we return a null pointer as the dangling sentinel. + // We can't return the usize::MAX sentinel, as that could valid if T is ZST. + // SAFETY: we have to return a known sentinel here that cannot be produced for + // a valid pointer, so that `from_raw` can reverse this transformation. + (ptr as *mut T).set_ptr_value(ptr::null_mut()) + } else { + // SAFETY: If the pointer is not dangling, it describes to a valid allocation. + // The payload may be dropped at this point, and we have to maintain provenance, + // so use raw pointer manipulation. + unsafe { &raw mut (*ptr).data } } } @@ -1768,18 +1770,23 @@ impl Weak { /// [`forget`]: std::mem::forget #[stable(feature = "weak_into_raw", since = "1.45.0")] pub unsafe fn from_raw(ptr: *const T) -> Self { - // SAFETY: data_offset is safe to call, because this pointer originates from a Weak. // See Weak::as_ptr for context on how the input pointer is derived. - let offset = unsafe { data_offset(ptr) }; - // Reverse the offset to find the original ArcInner. - // SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized) - let ptr = unsafe { - set_data_ptr(ptr as *mut ArcInner, (ptr as *mut u8).wrapping_offset(-offset)) + let ptr = if ptr.is_null() { + // If we get a null pointer, this is a dangling weak. + // SAFETY: this is the same sentinel as used in Weak::new and is_dangling + (ptr as *mut ArcInner).set_ptr_value(usize::MAX as *mut _) + } else { + // Otherwise, this describes a real allocation. + // SAFETY: data_offset is safe to call, as ptr describes a real allocation. + let offset = unsafe { data_offset(ptr) }; + // Thus, we reverse the offset to get the whole RcBox. + // SAFETY: the pointer originated from a Weak, so this offset is safe. + unsafe { (ptr as *mut ArcInner).set_ptr_value((ptr as *mut u8).offset(-offset)) } }; // SAFETY: we now have recovered the original Weak pointer, so can create the Weak. - unsafe { Weak { ptr: NonNull::new_unchecked(ptr) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } } } } diff --git a/library/alloc/src/sync/tests.rs b/library/alloc/src/sync/tests.rs index e8e1e66da5ed4..ce5c32ed8c5f3 100644 --- a/library/alloc/src/sync/tests.rs +++ b/library/alloc/src/sync/tests.rs @@ -158,6 +158,30 @@ fn into_from_weak_raw() { } } +#[test] +fn test_into_from_weak_raw_unsized() { + use std::fmt::Display; + use std::string::ToString; + + let arc: Arc = Arc::from("foo"); + let weak: Weak = Arc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }, "foo"); + assert!(weak.ptr_eq(&weak2)); + + let arc: Arc = Arc::new(123); + let weak: Weak = Arc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }.to_string(), "123"); + assert!(weak.ptr_eq(&weak2)); +} + #[test] fn test_cowarc_clone_make_mut() { let mut cow0 = Arc::new(75); @@ -329,6 +353,23 @@ fn test_unsized() { assert!(y.upgrade().is_none()); } +#[test] +fn test_maybe_thin_unsized() { + // If/when custom thin DSTs exist, this test should be updated to use one + use std::ffi::{CStr, CString}; + + let x: Arc = Arc::from(CString::new("swordfish").unwrap().into_boxed_c_str()); + assert_eq!(format!("{:?}", x), "\"swordfish\""); + let y: Weak = Arc::downgrade(&x); + drop(x); + + // At this point, the weak points to a dropped DST + assert!(y.upgrade().is_none()); + // But we still need to be able to get the alloc layout to drop. + // CStr has no drop glue, but custom DSTs might, and need to work. + drop(y); +} + #[test] fn test_from_owned() { let foo = 123; From f00b45890311da955bf2081a0fab2837f3a36a4d Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 7 Jan 2021 12:32:42 -0500 Subject: [PATCH 2/8] Tighten/clarify documentation of rc data_offset --- library/alloc/src/rc.rs | 7 ++----- library/alloc/src/sync.rs | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 48b9b8a34f1da..61d70a62dcaea 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2325,11 +2325,8 @@ impl Unpin for Rc {} /// /// # Safety /// -/// This has the same safety requirements as `align_of_val_raw`. In effect: -/// -/// - This function is safe for any argument if `T` is sized, and -/// - if `T` is unsized, the pointer must have appropriate pointer metadata -/// acquired from the real instance that you are getting this offset for. +/// The pointer must point to (and have valid metadata for) a previously +/// valid instance of T, but the T is allowed to be dropped. unsafe fn data_offset(ptr: *const T) -> isize { // Align the unsized value to the end of the `RcBox`. // Because it is ?Sized, it will always be the last field in memory. diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index ae61f4a2384b6..1ca6b6d633590 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -2476,11 +2476,8 @@ impl Unpin for Arc {} /// /// # Safety /// -/// This has the same safety requirements as `align_of_val_raw`. In effect: -/// -/// - This function is safe for any argument if `T` is sized, and -/// - if `T` is unsized, the pointer must have appropriate pointer metadata -/// acquired from the real instance that you are getting this offset for. +/// The pointer must point to (and have valid metadata for) a previously +/// valid instance of T, but the T is allowed to be dropped. unsafe fn data_offset(ptr: *const T) -> isize { // Align the unsized value to the end of the `ArcInner`. // Because it is `?Sized`, it will always be the last field in memory. From b10b9e25ffcb06077375960f07a0c6443184f07a Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 7 Jan 2021 12:41:58 -0500 Subject: [PATCH 3/8] Remove "pointer describes" terminology --- library/alloc/src/rc.rs | 9 ++++----- library/alloc/src/sync.rs | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 61d70a62dcaea..7f500af59a8df 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -1899,7 +1899,7 @@ impl Weak { // a valid pointer, so that `from_raw` can reverse this transformation. (ptr as *mut T).set_ptr_value(ptr::null_mut()) } else { - // SAFETY: If the pointer is not dangling, it describes to a valid allocation. + // SAFETY: If the pointer is not dangling, it references a valid allocation. // The payload may be dropped at this point, and we have to maintain provenance, // so use raw pointer manipulation. unsafe { &raw mut (*ptr).value } @@ -1991,8 +1991,8 @@ impl Weak { // SAFETY: this is the same sentinel as used in Weak::new and is_dangling (ptr as *mut RcBox).set_ptr_value(usize::MAX as *mut _) } else { - // Otherwise, this describes a real allocation. - // SAFETY: data_offset is safe to call, as ptr describes a real allocation. + // Otherwise, this references a real allocation. + // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. let offset = unsafe { data_offset(ptr) }; // Thus, we reverse the offset to get the whole RcBox. // SAFETY: the pointer originated from a Weak, so this offset is safe. @@ -2320,8 +2320,7 @@ impl AsRef for Rc { #[stable(feature = "pin", since = "1.33.0")] impl Unpin for Rc {} -/// Get the offset within an `RcBox` for -/// a payload of type described by a pointer. +/// Get the offset within an `RcBox` for the payload behind a pointer. /// /// # Safety /// diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 1ca6b6d633590..e2811a5cd6cee 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1685,7 +1685,7 @@ impl Weak { // a valid pointer, so that `from_raw` can reverse this transformation. (ptr as *mut T).set_ptr_value(ptr::null_mut()) } else { - // SAFETY: If the pointer is not dangling, it describes to a valid allocation. + // SAFETY: If the pointer is not dangling, it references a valid allocation. // The payload may be dropped at this point, and we have to maintain provenance, // so use raw pointer manipulation. unsafe { &raw mut (*ptr).data } @@ -1777,8 +1777,8 @@ impl Weak { // SAFETY: this is the same sentinel as used in Weak::new and is_dangling (ptr as *mut ArcInner).set_ptr_value(usize::MAX as *mut _) } else { - // Otherwise, this describes a real allocation. - // SAFETY: data_offset is safe to call, as ptr describes a real allocation. + // Otherwise, this references a real allocation. + // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. let offset = unsafe { data_offset(ptr) }; // Thus, we reverse the offset to get the whole RcBox. // SAFETY: the pointer originated from a Weak, so this offset is safe. @@ -2471,8 +2471,7 @@ impl AsRef for Arc { #[stable(feature = "pin", since = "1.33.0")] impl Unpin for Arc {} -/// Get the offset within an `ArcInner` for -/// a payload of type described by a pointer. +/// Get the offset within an `ArcInner` for the payload behind a pointer. /// /// # Safety /// From 1e578c9fb051ed3a4e608a43d840ad2877701333 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 7 Jan 2021 12:50:31 -0500 Subject: [PATCH 4/8] Reclarify Weak<->raw pointer safety comments --- library/alloc/src/rc.rs | 4 ++-- library/alloc/src/sync.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 7f500af59a8df..dd7710e5c051a 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -1899,7 +1899,7 @@ impl Weak { // a valid pointer, so that `from_raw` can reverse this transformation. (ptr as *mut T).set_ptr_value(ptr::null_mut()) } else { - // SAFETY: If the pointer is not dangling, it references a valid allocation. + // SAFETY: if is_dangling returns false, then the pointer is dereferencable. // The payload may be dropped at this point, and we have to maintain provenance, // so use raw pointer manipulation. unsafe { &raw mut (*ptr).value } @@ -1991,7 +1991,7 @@ impl Weak { // SAFETY: this is the same sentinel as used in Weak::new and is_dangling (ptr as *mut RcBox).set_ptr_value(usize::MAX as *mut _) } else { - // Otherwise, this references a real allocation. + // Otherwise, we're guaranteed the pointer came from a nondangling Weak. // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. let offset = unsafe { data_offset(ptr) }; // Thus, we reverse the offset to get the whole RcBox. diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index e2811a5cd6cee..6241ee626f7f0 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1685,7 +1685,7 @@ impl Weak { // a valid pointer, so that `from_raw` can reverse this transformation. (ptr as *mut T).set_ptr_value(ptr::null_mut()) } else { - // SAFETY: If the pointer is not dangling, it references a valid allocation. + // SAFETY: if is_dangling returns false, then the pointer is dereferencable. // The payload may be dropped at this point, and we have to maintain provenance, // so use raw pointer manipulation. unsafe { &raw mut (*ptr).data } @@ -1777,7 +1777,7 @@ impl Weak { // SAFETY: this is the same sentinel as used in Weak::new and is_dangling (ptr as *mut ArcInner).set_ptr_value(usize::MAX as *mut _) } else { - // Otherwise, this references a real allocation. + // Otherwise, we're guaranteed the pointer came from a nondangling Weak. // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. let offset = unsafe { data_offset(ptr) }; // Thus, we reverse the offset to get the whole RcBox. From 4901c55af7ec98231653cd0c3a0b1a6937eac743 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 7 Jan 2021 13:39:32 -0500 Subject: [PATCH 5/8] Replace set_data_ptr with pointer::set_ptr_value --- library/alloc/src/rc.rs | 19 +++---------------- library/alloc/src/sync.rs | 18 ++---------------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index dd7710e5c051a..ed22571409c6d 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -827,8 +827,8 @@ impl Rc { let offset = unsafe { data_offset(ptr) }; // Reverse the offset to find the original RcBox. - let fake_ptr = ptr as *mut RcBox; - let rc_ptr = unsafe { set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset)) }; + let rc_ptr = + unsafe { (ptr as *mut RcBox).set_ptr_value((ptr as *mut u8).offset(-offset)) }; unsafe { Self::from_ptr(rc_ptr) } } @@ -1154,7 +1154,7 @@ impl Rc { Self::allocate_for_layout( Layout::for_value(&*ptr), |layout| Global.allocate(layout), - |mem| set_data_ptr(ptr as *mut T, mem) as *mut RcBox, + |mem| (ptr as *mut RcBox).set_ptr_value(mem), ) } } @@ -1193,20 +1193,7 @@ impl Rc<[T]> { ) } } -} - -/// Sets the data pointer of a `?Sized` raw pointer. -/// -/// For a slice/trait object, this sets the `data` field and leaves the rest -/// unchanged. For a sized raw pointer, this simply sets the pointer. -unsafe fn set_data_ptr(mut ptr: *mut T, data: *mut U) -> *mut T { - unsafe { - ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8); - } - ptr -} -impl Rc<[T]> { /// Copy elements from slice into newly allocated Rc<\[T\]> /// /// Unsafe because the caller must either take ownership or bind `T: Copy` diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 6241ee626f7f0..aa6acdbff3419 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -844,8 +844,7 @@ impl Arc { let offset = data_offset(ptr); // Reverse the offset to find the original ArcInner. - let fake_ptr = ptr as *mut ArcInner; - let arc_ptr = set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset)); + let arc_ptr = (ptr as *mut ArcInner).set_ptr_value((ptr as *mut u8).offset(-offset)); Self::from_ptr(arc_ptr) } @@ -1129,7 +1128,7 @@ impl Arc { Self::allocate_for_layout( Layout::for_value(&*ptr), |layout| Global.allocate(layout), - |mem| set_data_ptr(ptr as *mut T, mem) as *mut ArcInner, + |mem| (ptr as *mut ArcInner).set_ptr_value(mem) as *mut ArcInner, ) } } @@ -1168,20 +1167,7 @@ impl Arc<[T]> { ) } } -} - -/// Sets the data pointer of a `?Sized` raw pointer. -/// -/// For a slice/trait object, this sets the `data` field and leaves the rest -/// unchanged. For a sized raw pointer, this simply sets the pointer. -unsafe fn set_data_ptr(mut ptr: *mut T, data: *mut U) -> *mut T { - unsafe { - ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8); - } - ptr -} -impl Arc<[T]> { /// Copy elements from slice into newly allocated Arc<\[T\]> /// /// Unsafe because the caller must either take ownership or bind `T: Copy`. From 747dbcb3255399396ca16c8462c5a809423c7350 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sat, 9 Jan 2021 14:32:55 -0500 Subject: [PATCH 6/8] Provide reasoning for rc data_offset safety --- library/alloc/src/rc.rs | 10 ++++++---- library/alloc/src/sync.rs | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index ed22571409c6d..b9f3f357c1a51 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2314,10 +2314,12 @@ impl Unpin for Rc {} /// The pointer must point to (and have valid metadata for) a previously /// valid instance of T, but the T is allowed to be dropped. unsafe fn data_offset(ptr: *const T) -> isize { - // Align the unsized value to the end of the `RcBox`. - // Because it is ?Sized, it will always be the last field in memory. - // Note: This is a detail of the current implementation of the compiler, - // and is not a guaranteed language detail. Do not rely on it outside of std. + // Align the unsized value to the end of the RcBox. + // Because RcBox is repr(C), it will always be the last field in memory. + // SAFETY: since the only unsized types possible are slices, trait objects, + // and extern types, the input safety requirement is currently enough to + // satisfy the requirements of align_of_val_raw; this is an implementation + // detail of the language that may not be relied upon outside of std. unsafe { data_offset_align(align_of_val_raw(ptr)) } } diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index aa6acdbff3419..c50f5270a4d10 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -2464,10 +2464,12 @@ impl Unpin for Arc {} /// The pointer must point to (and have valid metadata for) a previously /// valid instance of T, but the T is allowed to be dropped. unsafe fn data_offset(ptr: *const T) -> isize { - // Align the unsized value to the end of the `ArcInner`. - // Because it is `?Sized`, it will always be the last field in memory. - // Note: This is a detail of the current implementation of the compiler, - // and is not a guaranteed language detail. Do not rely on it outside of std. + // Align the unsized value to the end of the ArcInner. + // Because RcBox is repr(C), it will always be the last field in memory. + // SAFETY: since the only unsized types possible are slices, trait objects, + // and extern types, the input safety requirement is currently enough to + // satisfy the requirements of align_of_val_raw; this is an implementation + // detail of the language that may not be relied upon outside of std. unsafe { data_offset_align(align_of_val_raw(ptr)) } } From b5b6760c03867e2b16862324b4764cf35be8a1cd Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sun, 10 Jan 2021 23:27:32 -0500 Subject: [PATCH 7/8] Weak::into_raw shouldn't translate sentinel value --- library/alloc/src/rc.rs | 27 ++++++++++++--------------- library/alloc/src/sync.rs | 21 +++++++++------------ 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index b9f3f357c1a51..43affa396c9d7 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -848,7 +848,7 @@ impl Rc { pub fn downgrade(this: &Self) -> Weak { this.inner().inc_weak(); // Make sure we do not create a dangling Weak - debug_assert!(!is_dangling(this.ptr)); + debug_assert!(!is_dangling(this.ptr.as_ptr())); Weak { ptr: this.ptr } } @@ -1837,8 +1837,8 @@ impl Weak { } } -pub(crate) fn is_dangling(ptr: NonNull) -> bool { - let address = ptr.as_ptr() as *mut () as usize; +pub(crate) fn is_dangling(ptr: *mut T) -> bool { + let address = ptr as *mut () as usize; address == usize::MAX } @@ -1879,17 +1879,15 @@ impl Weak { pub fn as_ptr(&self) -> *const T { let ptr: *mut RcBox = NonNull::as_ptr(self.ptr); - if is_dangling(self.ptr) { - // If the pointer is dangling, we return a null pointer as the dangling sentinel. - // We can't return the usize::MAX sentinel, as that could valid if T is ZST. - // SAFETY: we have to return a known sentinel here that cannot be produced for - // a valid pointer, so that `from_raw` can reverse this transformation. - (ptr as *mut T).set_ptr_value(ptr::null_mut()) + if is_dangling(ptr) { + // If the pointer is dangling, we return the sentinel directly. This cannot be + // a valid payload address, as it is at least as aligned as RcBox (usize). + ptr as *const T } else { // SAFETY: if is_dangling returns false, then the pointer is dereferencable. // The payload may be dropped at this point, and we have to maintain provenance, // so use raw pointer manipulation. - unsafe { &raw mut (*ptr).value } + unsafe { &raw const (*ptr).value } } } @@ -1973,10 +1971,9 @@ impl Weak { pub unsafe fn from_raw(ptr: *const T) -> Self { // See Weak::as_ptr for context on how the input pointer is derived. - let ptr = if ptr.is_null() { - // If we get a null pointer, this is a dangling weak. - // SAFETY: this is the same sentinel as used in Weak::new and is_dangling - (ptr as *mut RcBox).set_ptr_value(usize::MAX as *mut _) + let ptr = if is_dangling(ptr as *mut T) { + // This is a dangling Weak. + ptr as *mut RcBox } else { // Otherwise, we're guaranteed the pointer came from a nondangling Weak. // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. @@ -2052,7 +2049,7 @@ impl Weak { /// (i.e., when this `Weak` was created by `Weak::new`). #[inline] fn inner(&self) -> Option> { - if is_dangling(self.ptr) { + if is_dangling(self.ptr.as_ptr()) { None } else { // We are careful to *not* create a reference covering the "data" field, as diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index c50f5270a4d10..8917e2d4b400a 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -885,7 +885,7 @@ impl Arc { match this.inner().weak.compare_exchange_weak(cur, cur + 1, Acquire, Relaxed) { Ok(_) => { // Make sure we do not create a dangling Weak - debug_assert!(!is_dangling(this.ptr)); + debug_assert!(!is_dangling(this.ptr.as_ptr())); return Weak { ptr: this.ptr }; } Err(old) => cur = old, @@ -1664,12 +1664,10 @@ impl Weak { pub fn as_ptr(&self) -> *const T { let ptr: *mut ArcInner = NonNull::as_ptr(self.ptr); - if is_dangling(self.ptr) { - // If the pointer is dangling, we return a null pointer as the dangling sentinel. - // We can't return the usize::MAX sentinel, as that could valid if T is ZST. - // SAFETY: we have to return a known sentinel here that cannot be produced for - // a valid pointer, so that `from_raw` can reverse this transformation. - (ptr as *mut T).set_ptr_value(ptr::null_mut()) + if is_dangling(ptr) { + // If the pointer is dangling, we return the sentinel directly. This cannot be + // a valid payload address, as it is at least as aligned as ArcInner (usize). + ptr as *const T } else { // SAFETY: if is_dangling returns false, then the pointer is dereferencable. // The payload may be dropped at this point, and we have to maintain provenance, @@ -1758,10 +1756,9 @@ impl Weak { pub unsafe fn from_raw(ptr: *const T) -> Self { // See Weak::as_ptr for context on how the input pointer is derived. - let ptr = if ptr.is_null() { - // If we get a null pointer, this is a dangling weak. - // SAFETY: this is the same sentinel as used in Weak::new and is_dangling - (ptr as *mut ArcInner).set_ptr_value(usize::MAX as *mut _) + let ptr = if is_dangling(ptr as *mut T) { + // This is a dangling Weak. + ptr as *mut ArcInner } else { // Otherwise, we're guaranteed the pointer came from a nondangling Weak. // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. @@ -1877,7 +1874,7 @@ impl Weak { /// (i.e., when this `Weak` was created by `Weak::new`). #[inline] fn inner(&self) -> Option> { - if is_dangling(self.ptr) { + if is_dangling(self.ptr.as_ptr()) { None } else { // We are careful to *not* create a reference covering the "data" field, as From c14e919f1e221dd0629bd88db6db77c52e03604e Mon Sep 17 00:00:00 2001 From: Christopher Durham Date: Wed, 13 Jan 2021 17:21:23 -0500 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Ralf Jung --- library/alloc/src/rc.rs | 2 +- library/alloc/src/sync.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 43affa396c9d7..3ca10480b34c8 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -1881,7 +1881,7 @@ impl Weak { if is_dangling(ptr) { // If the pointer is dangling, we return the sentinel directly. This cannot be - // a valid payload address, as it is at least as aligned as RcBox (usize). + // a valid payload address, as the payload is at least as aligned as RcBox (usize). ptr as *const T } else { // SAFETY: if is_dangling returns false, then the pointer is dereferencable. diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 8917e2d4b400a..306c0c91e4b25 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1666,7 +1666,7 @@ impl Weak { if is_dangling(ptr) { // If the pointer is dangling, we return the sentinel directly. This cannot be - // a valid payload address, as it is at least as aligned as ArcInner (usize). + // a valid payload address, as the payload is at least as aligned as ArcInner (usize). ptr as *const T } else { // SAFETY: if is_dangling returns false, then the pointer is dereferencable.