Skip to content

Commit

Permalink
Rollup merge of rust-lang#109179 - llogiq:intrinsically-option-as-sli…
Browse files Browse the repository at this point in the history
…ce, r=eholk

move Option::as_slice to intrinsic

``@scottmcm`` suggested on rust-lang#109095 I use a direct approach of unpacking the operation in MIR lowering, so here's the implementation.

cc ``@nikic`` as this should hopefully unblock rust-lang#107224 (though perhaps other changes to the prior implementation, which I left for bootstrapping, are needed).
  • Loading branch information
matthiaskrgr authored Mar 21, 2023
2 parents e72ad51 + 27e9ee9 commit 98bc283
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 72 deletions.
1 change: 1 addition & 0 deletions compiler/rustc_hir/src/lang_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ language_item_table! {
Context, sym::Context, context, Target::Struct, GenericRequirement::None;
FuturePoll, sym::poll, future_poll_fn, Target::Method(MethodKind::Trait { body: false }), GenericRequirement::None;

Option, sym::Option, option_type, Target::Enum, GenericRequirement::None;
OptionSome, sym::Some, option_some_variant, Target::Variant, GenericRequirement::None;
OptionNone, sym::None, option_none_variant, Target::Variant, GenericRequirement::None;

Expand Down
15 changes: 15 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,21 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
],
tcx.mk_ptr(ty::TypeAndMut { ty: param(0), mutbl: hir::Mutability::Not }),
),
sym::option_payload_ptr => {
let option_def_id = tcx.require_lang_item(hir::LangItem::Option, None);
let p0 = param(0);
(
1,
vec![tcx.mk_ptr(ty::TypeAndMut {
ty: tcx.mk_adt(
tcx.adt_def(option_def_id),
tcx.mk_substs_from_iter([ty::GenericArg::from(p0)].into_iter()),
),
mutbl: hir::Mutability::Not,
})],
tcx.mk_ptr(ty::TypeAndMut { ty: p0, mutbl: hir::Mutability::Not }),
)
}
sym::ptr_mask => (
1,
vec![
Expand Down
30 changes: 30 additions & 0 deletions compiler/rustc_mir_transform/src/lower_intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use rustc_middle::ty::subst::SubstsRef;
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_span::symbol::{sym, Symbol};
use rustc_span::Span;
use rustc_target::abi::VariantIdx;

pub struct LowerIntrinsics;

Expand Down Expand Up @@ -191,6 +192,35 @@ impl<'tcx> MirPass<'tcx> for LowerIntrinsics {
terminator.kind = TerminatorKind::Goto { target };
}
}
sym::option_payload_ptr => {
if let (Some(target), Some(arg)) = (*target, args[0].place()) {
let ty::RawPtr(ty::TypeAndMut { ty: dest_ty, .. }) =
destination.ty(local_decls, tcx).ty.kind()
else { bug!(); };

block.statements.push(Statement {
source_info: terminator.source_info,
kind: StatementKind::Assign(Box::new((
*destination,
Rvalue::AddressOf(
Mutability::Not,
arg.project_deeper(
&[
PlaceElem::Deref,
PlaceElem::Downcast(
Some(sym::Some),
VariantIdx::from_u32(1),
),
PlaceElem::Field(Field::from_u32(0), *dest_ty),
],
tcx,
),
),
))),
});
terminator.kind = TerminatorKind::Goto { target };
}
}
_ if intrinsic_name.as_str().starts_with("simd_shuffle") => {
validate_simd_shuffle(tcx, args, terminator.source_info.span);
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,7 @@ symbols! {
optin_builtin_traits,
option,
option_env,
option_payload_ptr,
options,
or,
or_patterns,
Expand Down
6 changes: 6 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2214,6 +2214,12 @@ extern "rust-intrinsic" {
where
G: FnOnce<ARG, Output = RET>,
F: FnOnce<ARG, Output = RET>;

#[cfg(not(bootstrap))]
/// This method creates a pointer to any `Some` value. If the argument is
/// `None`, an invalid within-bounds pointer (that is still acceptable for
/// constructing an empty slice) is returned.
pub fn option_payload_ptr<T>(arg: *const Option<T>) -> *const T;
}

// Some functions are defined here because they accidentally got made
Expand Down
108 changes: 36 additions & 72 deletions library/core/src/option.rs
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ use crate::{
/// The `Option` type. See [the module level documentation](self) for more.
#[derive(Copy, PartialOrd, Eq, Ord, Debug, Hash)]
#[rustc_diagnostic_item = "Option"]
#[cfg_attr(not(bootstrap), lang = "Option")]
#[stable(feature = "rust1", since = "1.0.0")]
pub enum Option<T> {
/// No value.
Expand Down Expand Up @@ -735,48 +736,6 @@ impl<T> Option<T> {
}
}

/// This is a guess at how many bytes into the option the payload can be found.
///
/// For niche-optimized types it's correct because it's pigeon-holed to only
/// one possible place. For other types, it's usually correct today, but
/// tweaks to the layout algorithm (particularly expansions of
/// `-Z randomize-layout`) might make it incorrect at any point.
///
/// It's guaranteed to be a multiple of alignment (so will always give a
/// correctly-aligned location) and to be within the allocated object, so
/// is valid to use with `offset` and to use for a zero-sized read.
///
/// FIXME: This is a horrible hack, but allows a nice optimization. It should
/// be replaced with `offset_of!` once that works on enum variants.
const SOME_BYTE_OFFSET_GUESS: isize = {
let some_uninit = Some(mem::MaybeUninit::<T>::uninit());
let payload_ref = some_uninit.as_ref().unwrap();
// SAFETY: `as_ref` gives an address inside the existing `Option`,
// so both pointers are derived from the same thing and the result
// cannot overflow an `isize`.
let offset = unsafe { <*const _>::byte_offset_from(payload_ref, &some_uninit) };

// The offset is into the object, so it's guaranteed to be non-negative.
assert!(offset >= 0);

// The payload and the overall option are aligned,
// so the offset will be a multiple of the alignment too.
assert!((offset as usize) % mem::align_of::<T>() == 0);

let max_offset = mem::size_of::<Self>() - mem::size_of::<T>();
if offset as usize <= max_offset {
// There's enough space after this offset for a `T` to exist without
// overflowing the bounds of the object, so let's try it.
offset
} else {
// The offset guess is definitely wrong, so use the address
// of the original option since we have it already.
// This also correctly handles the case of layout-optimized enums
// where `max_offset == 0` and thus this is the only possibility.
0
}
};

/// Returns a slice of the contained value, if any. If this is `None`, an
/// empty slice is returned. This can be useful to have a single type of
/// iterator over an `Option` or slice.
Expand Down Expand Up @@ -809,28 +768,29 @@ impl<T> Option<T> {
#[must_use]
#[unstable(feature = "option_as_slice", issue = "108545")]
pub fn as_slice(&self) -> &[T] {
let payload_ptr: *const T =
// The goal here is that both arms here are calculating exactly
// the same pointer, and thus it'll be folded away when the guessed
// offset is correct, but if the guess is wrong for some reason
// it'll at least still be sound, just no longer optimal.
if let Some(payload) = self {
payload
} else {
let self_ptr: *const Self = self;
// SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is
// such that this will be in-bounds of the object.
unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() }
};
let len = usize::from(self.is_some());
#[cfg(bootstrap)]
match self {
Some(value) => slice::from_ref(value),
None => &[],
}

#[cfg(not(bootstrap))]
// SAFETY: When the `Option` is `Some`, we're using the actual pointer
// to the payload, with a length of 1, so this is equivalent to
// `slice::from_ref`, and thus is safe.
// When the `Option` is `None`, the length used is 0, so to be safe it
// just needs to be aligned, which it is because `&self` is aligned and
// the offset used is a multiple of alignment.
unsafe { slice::from_raw_parts(payload_ptr, len) }
//
// In the new version, the intrinsic always returns a pointer to an
// in-bounds and correctly aligned position for a `T` (even if in the
// `None` case it's just padding).
unsafe {
slice::from_raw_parts(
crate::intrinsics::option_payload_ptr(crate::ptr::from_ref(self)),
usize::from(self.is_some()),
)
}
}

/// Returns a mutable slice of the contained value, if any. If this is
Expand Down Expand Up @@ -875,28 +835,32 @@ impl<T> Option<T> {
#[must_use]
#[unstable(feature = "option_as_slice", issue = "108545")]
pub fn as_mut_slice(&mut self) -> &mut [T] {
let payload_ptr: *mut T =
// The goal here is that both arms here are calculating exactly
// the same pointer, and thus it'll be folded away when the guessed
// offset is correct, but if the guess is wrong for some reason
// it'll at least still be sound, just no longer optimal.
if let Some(payload) = self {
payload
} else {
let self_ptr: *mut Self = self;
// SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is
// such that this will be in-bounds of the object.
unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() }
};
let len = usize::from(self.is_some());
#[cfg(bootstrap)]
match self {
Some(value) => slice::from_mut(value),
None => &mut [],
}

#[cfg(not(bootstrap))]
// SAFETY: When the `Option` is `Some`, we're using the actual pointer
// to the payload, with a length of 1, so this is equivalent to
// `slice::from_mut`, and thus is safe.
// When the `Option` is `None`, the length used is 0, so to be safe it
// just needs to be aligned, which it is because `&self` is aligned and
// the offset used is a multiple of alignment.
unsafe { slice::from_raw_parts_mut(payload_ptr, len) }
//
// In the new version, the intrinsic creates a `*const T` from a
// mutable reference so it is safe to cast back to a mutable pointer
// here. As with `as_slice`, the intrinsic always returns a pointer to
// an in-bounds and correctly aligned position for a `T` (even if in
// the `None` case it's just padding).
unsafe {
slice::from_raw_parts_mut(
crate::intrinsics::option_payload_ptr(crate::ptr::from_mut(self).cast_const())
.cast_mut(),
usize::from(self.is_some()),
)
}
}

/////////////////////////////////////////////////////////////////////////
Expand Down
54 changes: 54 additions & 0 deletions tests/mir-opt/lower_intrinsics.option_payload.LowerIntrinsics.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
- // MIR for `option_payload` before LowerIntrinsics
+ // MIR for `option_payload` after LowerIntrinsics

fn option_payload(_1: &Option<usize>, _2: &Option<String>) -> () {
debug o => _1; // in scope 0 at $DIR/lower_intrinsics.rs:+0:23: +0:24
debug p => _2; // in scope 0 at $DIR/lower_intrinsics.rs:+0:42: +0:43
let mut _0: (); // return place in scope 0 at $DIR/lower_intrinsics.rs:+0:62: +0:62
let mut _4: *const std::option::Option<usize>; // in scope 0 at $DIR/lower_intrinsics.rs:+2:55: +2:56
let mut _6: *const std::option::Option<std::string::String>; // in scope 0 at $DIR/lower_intrinsics.rs:+3:55: +3:56
scope 1 {
let _3: *const usize; // in scope 1 at $DIR/lower_intrinsics.rs:+2:13: +2:15
scope 2 {
debug _x => _3; // in scope 2 at $DIR/lower_intrinsics.rs:+2:13: +2:15
let _5: *const std::string::String; // in scope 2 at $DIR/lower_intrinsics.rs:+3:13: +3:15
scope 3 {
debug _y => _5; // in scope 3 at $DIR/lower_intrinsics.rs:+3:13: +3:15
}
}
}

bb0: {
StorageLive(_3); // scope 1 at $DIR/lower_intrinsics.rs:+2:13: +2:15
StorageLive(_4); // scope 1 at $DIR/lower_intrinsics.rs:+2:55: +2:56
_4 = &raw const (*_1); // scope 1 at $DIR/lower_intrinsics.rs:+2:55: +2:56
- _3 = option_payload_ptr::<usize>(move _4) -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
- // mir::Constant
- // + span: $DIR/lower_intrinsics.rs:99:18: 99:54
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option<usize>) -> *const usize {option_payload_ptr::<usize>}, val: Value(<ZST>) }
+ _3 = &raw const (((*_4) as Some).0: usize); // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
+ goto -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
}

bb1: {
StorageDead(_4); // scope 1 at $DIR/lower_intrinsics.rs:+2:56: +2:57
StorageLive(_5); // scope 2 at $DIR/lower_intrinsics.rs:+3:13: +3:15
StorageLive(_6); // scope 2 at $DIR/lower_intrinsics.rs:+3:55: +3:56
_6 = &raw const (*_2); // scope 2 at $DIR/lower_intrinsics.rs:+3:55: +3:56
- _5 = option_payload_ptr::<String>(move _6) -> bb2; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
- // mir::Constant
- // + span: $DIR/lower_intrinsics.rs:100:18: 100:54
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option<String>) -> *const String {option_payload_ptr::<String>}, val: Value(<ZST>) }
+ _5 = &raw const (((*_6) as Some).0: std::string::String); // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
+ goto -> bb2; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
}

bb2: {
StorageDead(_6); // scope 2 at $DIR/lower_intrinsics.rs:+3:56: +3:57
_0 = const (); // scope 1 at $DIR/lower_intrinsics.rs:+1:5: +4:6
StorageDead(_5); // scope 2 at $DIR/lower_intrinsics.rs:+4:5: +4:6
StorageDead(_3); // scope 1 at $DIR/lower_intrinsics.rs:+4:5: +4:6
return; // scope 0 at $DIR/lower_intrinsics.rs:+5:2: +5:2
}
}

9 changes: 9 additions & 0 deletions tests/mir-opt/lower_intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,12 @@ pub fn read_via_copy_uninhabited(r: &Never) -> Never {
}

pub enum Never {}

// EMIT_MIR lower_intrinsics.option_payload.LowerIntrinsics.diff
#[cfg(not(bootstrap))]
pub fn option_payload(o: &Option<usize>, p: &Option<String>) {
unsafe {
let _x = core::intrinsics::option_payload_ptr(o);
let _y = core::intrinsics::option_payload_ptr(p);
}
}

0 comments on commit 98bc283

Please sign in to comment.