From 7feb7383d23868c1ace7288b33e6029a9562c5e7 Mon Sep 17 00:00:00 2001 From: Roc Yu Date: Sun, 10 Apr 2022 19:17:38 -0400 Subject: [PATCH 01/17] rustdoc: Reduce allocations in a `theme` function --- src/librustdoc/theme.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/librustdoc/theme.rs b/src/librustdoc/theme.rs index 1e9a65e1d2fc4..7c19865b6d78b 100644 --- a/src/librustdoc/theme.rs +++ b/src/librustdoc/theme.rs @@ -173,15 +173,17 @@ fn build_rule(v: &[u8], positions: &[usize]) -> String { .map(|x| ::std::str::from_utf8(&v[x[0]..x[1]]).unwrap_or("")) .collect::() .trim() - .replace('\n', " ") - .replace('/', "") - .replace('\t', " ") - .replace('{', "") - .replace('}', "") + .chars() + .filter_map(|c| match c { + '\n' | '\t' => Some(' '), + '/' | '{' | '}' => None, + c => Some(c), + }) + .collect::() .split(' ') .filter(|s| !s.is_empty()) - .collect::>() - .join(" "), + .intersperse(" ") + .collect::(), ) .unwrap_or_else(|_| String::new()) } From be23ead9a422b1cf271296ebebd7a8d3afd1604a Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Sun, 10 Apr 2022 16:35:37 -0700 Subject: [PATCH 02/17] Fix crate_type attribute to not warn on duplicates --- compiler/rustc_feature/src/builtin_attrs.rs | 2 +- .../ui/lint/unused/unused-attr-duplicate.rs | 3 - .../lint/unused/unused-attr-duplicate.stderr | 101 ++++++++---------- 3 files changed, 45 insertions(+), 61 deletions(-) diff --git a/compiler/rustc_feature/src/builtin_attrs.rs b/compiler/rustc_feature/src/builtin_attrs.rs index 7c53f839a92e4..e588385cfca03 100644 --- a/compiler/rustc_feature/src/builtin_attrs.rs +++ b/compiler/rustc_feature/src/builtin_attrs.rs @@ -310,7 +310,7 @@ pub const BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[ // Crate properties: ungated!(crate_name, CrateLevel, template!(NameValueStr: "name"), FutureWarnFollowing), - ungated!(crate_type, CrateLevel, template!(NameValueStr: "bin|lib|..."), FutureWarnFollowing), + ungated!(crate_type, CrateLevel, template!(NameValueStr: "bin|lib|..."), DuplicatesOk), // crate_id is deprecated ungated!(crate_id, CrateLevel, template!(NameValueStr: "ignored"), FutureWarnFollowing), diff --git a/src/test/ui/lint/unused/unused-attr-duplicate.rs b/src/test/ui/lint/unused/unused-attr-duplicate.rs index 074d5a92ad620..692617eacfbf4 100644 --- a/src/test/ui/lint/unused/unused-attr-duplicate.rs +++ b/src/test/ui/lint/unused/unused-attr-duplicate.rs @@ -13,9 +13,6 @@ #![crate_name = "unused_attr_duplicate"] #![crate_name = "unused_attr_duplicate2"] //~ ERROR unused attribute //~^ WARN this was previously accepted -#![crate_type = "bin"] -#![crate_type = "rlib"] //~ ERROR unused attribute -//~^ WARN this was previously accepted #![recursion_limit = "128"] #![recursion_limit = "256"] //~ ERROR unused attribute //~^ WARN this was previously accepted diff --git a/src/test/ui/lint/unused/unused-attr-duplicate.stderr b/src/test/ui/lint/unused/unused-attr-duplicate.stderr index d4305add0aa4f..f592323b550be 100644 --- a/src/test/ui/lint/unused/unused-attr-duplicate.stderr +++ b/src/test/ui/lint/unused/unused-attr-duplicate.stderr @@ -1,5 +1,5 @@ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:36:1 + --> $DIR/unused-attr-duplicate.rs:33:1 | LL | #[no_link] | ^^^^^^^^^^ help: remove this attribute @@ -10,180 +10,180 @@ note: the lint level is defined here LL | #![deny(unused_attributes)] | ^^^^^^^^^^^^^^^^^ note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:35:1 + --> $DIR/unused-attr-duplicate.rs:32:1 | LL | #[no_link] | ^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:40:1 + --> $DIR/unused-attr-duplicate.rs:37:1 | LL | #[macro_use] | ^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:39:1 + --> $DIR/unused-attr-duplicate.rs:36:1 | LL | #[macro_use] | ^^^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:50:1 + --> $DIR/unused-attr-duplicate.rs:47:1 | LL | #[path = "bar.rs"] | ^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:49:1 + --> $DIR/unused-attr-duplicate.rs:46:1 | LL | #[path = "auxiliary/lint_unused_extern_crate.rs"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:56:1 + --> $DIR/unused-attr-duplicate.rs:53:1 | LL | #[ignore = "some text"] | ^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:55:1 + --> $DIR/unused-attr-duplicate.rs:52:1 | LL | #[ignore] | ^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:58:1 + --> $DIR/unused-attr-duplicate.rs:55:1 | LL | #[should_panic(expected = "values don't match")] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:57:1 + --> $DIR/unused-attr-duplicate.rs:54:1 | LL | #[should_panic] | ^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:63:1 + --> $DIR/unused-attr-duplicate.rs:60:1 | LL | #[must_use = "some message"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:62:1 + --> $DIR/unused-attr-duplicate.rs:59:1 | LL | #[must_use] | ^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:69:1 + --> $DIR/unused-attr-duplicate.rs:66:1 | LL | #[non_exhaustive] | ^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:68:1 + --> $DIR/unused-attr-duplicate.rs:65:1 | LL | #[non_exhaustive] | ^^^^^^^^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:73:1 + --> $DIR/unused-attr-duplicate.rs:70:1 | LL | #[automatically_derived] | ^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:72:1 + --> $DIR/unused-attr-duplicate.rs:69:1 | LL | #[automatically_derived] | ^^^^^^^^^^^^^^^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:77:1 + --> $DIR/unused-attr-duplicate.rs:74:1 | LL | #[inline(never)] | ^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:76:1 + --> $DIR/unused-attr-duplicate.rs:73:1 | LL | #[inline(always)] | ^^^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:80:1 + --> $DIR/unused-attr-duplicate.rs:77:1 | LL | #[cold] | ^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:79:1 + --> $DIR/unused-attr-duplicate.rs:76:1 | LL | #[cold] | ^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:82:1 + --> $DIR/unused-attr-duplicate.rs:79:1 | LL | #[track_caller] | ^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:81:1 + --> $DIR/unused-attr-duplicate.rs:78:1 | LL | #[track_caller] | ^^^^^^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:95:1 + --> $DIR/unused-attr-duplicate.rs:92:1 | LL | #[export_name = "exported_symbol_name"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:97:1 + --> $DIR/unused-attr-duplicate.rs:94:1 | LL | #[export_name = "exported_symbol_name2"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:101:1 + --> $DIR/unused-attr-duplicate.rs:98:1 | LL | #[no_mangle] | ^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:100:1 + --> $DIR/unused-attr-duplicate.rs:97:1 | LL | #[no_mangle] | ^^^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:105:1 + --> $DIR/unused-attr-duplicate.rs:102:1 | LL | #[used] | ^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:104:1 + --> $DIR/unused-attr-duplicate.rs:101:1 | LL | #[used] | ^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:89:5 + --> $DIR/unused-attr-duplicate.rs:86:5 | LL | #[link_name = "this_does_not_exist"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:91:5 + --> $DIR/unused-attr-duplicate.rs:88:5 | LL | #[link_name = "rust_dbg_extern_identity_u32"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -205,102 +205,89 @@ LL | #![crate_name = "unused_attr_duplicate"] error: unused attribute --> $DIR/unused-attr-duplicate.rs:17:1 | -LL | #![crate_type = "rlib"] - | ^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute - | -note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:16:1 - | -LL | #![crate_type = "bin"] - | ^^^^^^^^^^^^^^^^^^^^^^ - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - -error: unused attribute - --> $DIR/unused-attr-duplicate.rs:20:1 - | LL | #![recursion_limit = "256"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:19:1 + --> $DIR/unused-attr-duplicate.rs:16:1 | LL | #![recursion_limit = "128"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:23:1 + --> $DIR/unused-attr-duplicate.rs:20:1 | LL | #![type_length_limit = "1"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:22:1 + --> $DIR/unused-attr-duplicate.rs:19:1 | LL | #![type_length_limit = "1048576"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:26:1 + --> $DIR/unused-attr-duplicate.rs:23:1 | LL | #![no_std] | ^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:25:1 + --> $DIR/unused-attr-duplicate.rs:22:1 | LL | #![no_std] | ^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:28:1 + --> $DIR/unused-attr-duplicate.rs:25:1 | LL | #![no_implicit_prelude] | ^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:27:1 + --> $DIR/unused-attr-duplicate.rs:24:1 | LL | #![no_implicit_prelude] | ^^^^^^^^^^^^^^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:30:1 + --> $DIR/unused-attr-duplicate.rs:27:1 | LL | #![windows_subsystem = "windows"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:29:1 + --> $DIR/unused-attr-duplicate.rs:26:1 | LL | #![windows_subsystem = "console"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! error: unused attribute - --> $DIR/unused-attr-duplicate.rs:33:1 + --> $DIR/unused-attr-duplicate.rs:30:1 | LL | #![no_builtins] | ^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:32:1 + --> $DIR/unused-attr-duplicate.rs:29:1 | LL | #![no_builtins] | ^^^^^^^^^^^^^^^ error: unused attribute - --> $DIR/unused-attr-duplicate.rs:43:5 + --> $DIR/unused-attr-duplicate.rs:40:5 | LL | #[macro_export] | ^^^^^^^^^^^^^^^ help: remove this attribute | note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:42:5 + --> $DIR/unused-attr-duplicate.rs:39:5 | LL | #[macro_export] | ^^^^^^^^^^^^^^^ -error: aborting due to 24 previous errors +error: aborting due to 23 previous errors From d2b1bb8a9b7a0a73b8da36d12798f4e848ed7171 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Sun, 10 Apr 2022 22:15:31 -0700 Subject: [PATCH 03/17] use find_ancestor_inside to get right span in CastCheck --- compiler/rustc_typeck/src/check/cast.rs | 37 +++++++++++++------------ src/test/ui/cast/cast-macro-lhs.rs | 12 ++++++++ src/test/ui/cast/cast-macro-lhs.stderr | 11 ++++++++ 3 files changed, 42 insertions(+), 18 deletions(-) create mode 100644 src/test/ui/cast/cast-macro-lhs.rs create mode 100644 src/test/ui/cast/cast-macro-lhs.stderr diff --git a/compiler/rustc_typeck/src/check/cast.rs b/compiler/rustc_typeck/src/check/cast.rs index 6091b8fee00b6..88100fcc453c3 100644 --- a/compiler/rustc_typeck/src/check/cast.rs +++ b/compiler/rustc_typeck/src/check/cast.rs @@ -55,6 +55,7 @@ use rustc_trait_selection::traits::error_reporting::report_object_safety_error; pub struct CastCheck<'tcx> { expr: &'tcx hir::Expr<'tcx>, expr_ty: Ty<'tcx>, + expr_span: Span, cast_ty: Ty<'tcx>, cast_span: Span, span: Span, @@ -207,7 +208,8 @@ impl<'a, 'tcx> CastCheck<'tcx> { cast_span: Span, span: Span, ) -> Result, ErrorGuaranteed> { - let check = CastCheck { expr, expr_ty, cast_ty, cast_span, span }; + let expr_span = expr.span.find_ancestor_inside(span).unwrap_or(expr.span); + let check = CastCheck { expr, expr_ty, expr_span, cast_ty, cast_span, span }; // For better error messages, check for some obviously unsized // cases now. We do a more thorough check at the end, once @@ -240,15 +242,15 @@ impl<'a, 'tcx> CastCheck<'tcx> { error_span, format!("cannot cast `{}` as `{}`", fcx.ty_to_string(self.expr_ty), cast_ty), ); - if let Ok(snippet) = fcx.sess().source_map().span_to_snippet(self.expr.span) { + if let Ok(snippet) = fcx.sess().source_map().span_to_snippet(self.expr_span) { err.span_suggestion( - self.expr.span, + self.expr_span, "dereference the expression", format!("*{}", snippet), Applicability::MaybeIncorrect, ); } else { - err.span_help(self.expr.span, "dereference the expression with `*`"); + err.span_help(self.expr_span, "dereference the expression with `*`"); } err.emit(); } @@ -315,7 +317,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { struct_span_err!(fcx.tcx.sess, self.span, E0054, "cannot cast as `bool`"); if self.expr_ty.is_numeric() { - match fcx.tcx.sess.source_map().span_to_snippet(self.expr.span) { + match fcx.tcx.sess.source_map().span_to_snippet(self.expr_span) { Ok(snippet) => { err.span_suggestion( self.span, @@ -440,7 +442,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { } if sugg_mutref { err.span_label(self.span, "invalid cast"); - err.span_note(self.expr.span, "this reference is immutable"); + err.span_note(self.expr_span, "this reference is immutable"); err.span_note(self.cast_span, "trying to cast to a mutable reference type"); } else if let Some((sugg, remove_cast)) = sugg { err.span_label(self.span, "invalid cast"); @@ -449,7 +451,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { .tcx .sess .source_map() - .span_to_snippet(self.expr.span) + .span_to_snippet(self.expr_span) .map_or(false, |snip| snip.starts_with('(')); // Very crude check to see whether the expression must be wrapped @@ -458,14 +460,14 @@ impl<'a, 'tcx> CastCheck<'tcx> { let needs_parens = !has_parens && matches!(self.expr.kind, hir::ExprKind::Cast(..)); - let mut suggestion = vec![(self.expr.span.shrink_to_lo(), sugg)]; + let mut suggestion = vec![(self.expr_span.shrink_to_lo(), sugg)]; if needs_parens { suggestion[0].1 += "("; - suggestion.push((self.expr.span.shrink_to_hi(), ")".to_string())); + suggestion.push((self.expr_span.shrink_to_hi(), ")".to_string())); } if remove_cast { suggestion.push(( - self.expr.span.shrink_to_hi().to(self.cast_span), + self.expr_span.shrink_to_hi().to(self.cast_span), String::new(), )); } @@ -481,7 +483,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { ) { let mut label = true; // Check `impl From for self.cast_ty {}` for accurate suggestion: - if let Ok(snippet) = fcx.tcx.sess.source_map().span_to_snippet(self.expr.span) { + if let Ok(snippet) = fcx.tcx.sess.source_map().span_to_snippet(self.expr_span) { if let Some(from_trait) = fcx.tcx.get_diagnostic_item(sym::From) { let ty = fcx.resolve_vars_if_possible(self.cast_ty); // Erase regions to avoid panic in `prove_value` when calling @@ -550,7 +552,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { if fcx.tcx.sess.is_nightly_build() { err.span_label( - self.expr.span, + self.expr_span, "consider casting this expression to `*const ()`, \ then using `core::ptr::from_raw_parts`", ); @@ -651,7 +653,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { } } _ => { - err.span_help(self.expr.span, "consider using a box or reference as appropriate"); + err.span_help(self.expr_span, "consider using a box or reference as appropriate"); } } err.emit() @@ -685,7 +687,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { #[instrument(skip(fcx), level = "debug")] pub fn check(mut self, fcx: &FnCtxt<'a, 'tcx>) { - self.expr_ty = fcx.structurally_resolved_type(self.expr.span, self.expr_ty); + self.expr_ty = fcx.structurally_resolved_type(self.expr_span, self.expr_ty); self.cast_ty = fcx.structurally_resolved_type(self.cast_span, self.cast_ty); debug!("check_cast({}, {:?} as {:?})", self.expr.hir_id, self.expr_ty, self.cast_ty); @@ -741,7 +743,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { ty::FnDef(..) => { // Attempt a coercion to a fn pointer type. let f = fcx.normalize_associated_types_in( - self.expr.span, + self.expr_span, self.expr_ty.fn_sig(fcx.tcx), ); let res = fcx.try_coerce( @@ -997,7 +999,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { )); let msg = "use `.addr()` to obtain the address of a pointer"; - if let Ok(snippet) = fcx.tcx.sess.source_map().span_to_snippet(self.expr.span) { + if let Ok(snippet) = fcx.tcx.sess.source_map().span_to_snippet(self.expr_span) { let scalar_cast = match t_c { ty::cast::IntTy::U(ty::UintTy::Usize) => String::new(), _ => format!(" as {}", self.cast_ty), @@ -1027,13 +1029,12 @@ impl<'a, 'tcx> CastCheck<'tcx> { self.expr.hir_id, self.span, |err| { - let mut err = err.build(&format!( "strict provenance disallows casting integer `{}` to pointer `{}`", self.expr_ty, self.cast_ty )); let msg = "use `.with_addr()` to adjust a valid pointer in the same allocation, to this address"; - if let Ok(snippet) = fcx.tcx.sess.source_map().span_to_snippet(self.expr.span) { + if let Ok(snippet) = fcx.tcx.sess.source_map().span_to_snippet(self.expr_span) { err.span_suggestion( self.span, msg, diff --git a/src/test/ui/cast/cast-macro-lhs.rs b/src/test/ui/cast/cast-macro-lhs.rs new file mode 100644 index 0000000000000..b509b3239bc58 --- /dev/null +++ b/src/test/ui/cast/cast-macro-lhs.rs @@ -0,0 +1,12 @@ +// Test to make sure we suggest "consider casting" on the right span + +macro_rules! foo { + () => { 0 } +} + +fn main() { + let x = foo!() as *const [u8]; + //~^ ERROR cannot cast `usize` to a pointer that is wide + //~| NOTE creating a `*const [u8]` requires both an address and a length + //~| NOTE consider casting this expression to `*const ()`, then using `core::ptr::from_raw_parts` +} diff --git a/src/test/ui/cast/cast-macro-lhs.stderr b/src/test/ui/cast/cast-macro-lhs.stderr new file mode 100644 index 0000000000000..db7ce57e15062 --- /dev/null +++ b/src/test/ui/cast/cast-macro-lhs.stderr @@ -0,0 +1,11 @@ +error[E0606]: cannot cast `usize` to a pointer that is wide + --> $DIR/cast-macro-lhs.rs:8:23 + | +LL | let x = foo!() as *const [u8]; + | ------ ^^^^^^^^^^^ creating a `*const [u8]` requires both an address and a length + | | + | consider casting this expression to `*const ()`, then using `core::ptr::from_raw_parts` + +error: aborting due to previous error + +For more information about this error, try `rustc --explain E0606`. From c4b83362c09a2f497b2845fc83c07eb8e0c9175e Mon Sep 17 00:00:00 2001 From: Takayuki Maeda Date: Mon, 11 Apr 2022 22:15:45 +0900 Subject: [PATCH 04/17] fix a bad error message for `relative paths are not supported in visibilities` error --- compiler/rustc_resolve/src/diagnostics.rs | 2 +- src/test/ui/privacy/restricted/relative-2018.rs | 2 +- src/test/ui/privacy/restricted/relative-2018.stderr | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_resolve/src/diagnostics.rs b/compiler/rustc_resolve/src/diagnostics.rs index 609dbd1fe1b4e..de2229f742d22 100644 --- a/compiler/rustc_resolve/src/diagnostics.rs +++ b/compiler/rustc_resolve/src/diagnostics.rs @@ -632,7 +632,7 @@ impl<'a> Resolver<'a> { VisResolutionError::Relative2018(span, path) => { let mut err = self.session.struct_span_err( span, - "relative paths are not supported in visibilities on 2018 edition", + "relative paths are not supported in visibilities in 2018 edition or later", ); err.span_suggestion( path.span, diff --git a/src/test/ui/privacy/restricted/relative-2018.rs b/src/test/ui/privacy/restricted/relative-2018.rs index 69b7c1e4d4f3c..954169a9ffb55 100644 --- a/src/test/ui/privacy/restricted/relative-2018.rs +++ b/src/test/ui/privacy/restricted/relative-2018.rs @@ -7,7 +7,7 @@ mod m { pub(in ::core) struct S4; //~^ ERROR visibilities can only be restricted to ancestor modules pub(in a::b) struct S5; - //~^ ERROR relative paths are not supported in visibilities on 2018 edition + //~^ ERROR relative paths are not supported in visibilities in 2018 edition or later } fn main() {} diff --git a/src/test/ui/privacy/restricted/relative-2018.stderr b/src/test/ui/privacy/restricted/relative-2018.stderr index 54fee085ee943..dec0d5157da4f 100644 --- a/src/test/ui/privacy/restricted/relative-2018.stderr +++ b/src/test/ui/privacy/restricted/relative-2018.stderr @@ -4,7 +4,7 @@ error[E0742]: visibilities can only be restricted to ancestor modules LL | pub(in ::core) struct S4; | ^^^^^^ -error: relative paths are not supported in visibilities on 2018 edition +error: relative paths are not supported in visibilities in 2018 edition or later --> $DIR/relative-2018.rs:9:12 | LL | pub(in a::b) struct S5; From 365d4f4fd0caa3e4e1dd95eb42f7244e1424d37e Mon Sep 17 00:00:00 2001 From: Cheng XU Date: Tue, 5 Apr 2022 23:13:46 -0700 Subject: [PATCH 05/17] pre-push.sh: Use python3 if python is not found Since Python 2 has reached EOL, `python` may not be available in certain systems (e.g., recent macOS). We should use `python3` in this case to avoid error like `python: No such file or directory`. --- src/etc/pre-push.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/etc/pre-push.sh b/src/etc/pre-push.sh index a78725f2ab0d1..5f5b48bc1c0b4 100755 --- a/src/etc/pre-push.sh +++ b/src/etc/pre-push.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Call `tidy --bless` before each commit +# Call `tidy --bless` before git push # Copy this script to .git/hooks to activate, # and remove it from .git/hooks to deactivate. # @@ -14,6 +14,8 @@ COMMAND="$ROOT_DIR/x.py test tidy --bless" if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then COMMAND="python $COMMAND" +elif ! command -v python &> /dev/null; then + COMMAND="python3 $COMMAND" fi echo "Running pre-push script '$COMMAND'" From dae5c842fca09a4da272ec003285a8867cf906eb Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Thu, 24 Mar 2022 18:25:03 -0400 Subject: [PATCH 06/17] Improve documentation of `Place` and `Operand` --- compiler/rustc_middle/src/mir/mod.rs | 134 ++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 578fcd82ad614..394bc12f015c1 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -1785,8 +1785,98 @@ pub struct CopyNonOverlapping<'tcx> { /////////////////////////////////////////////////////////////////////////// // Places -/// A path to a value; something that can be evaluated without -/// changing or disturbing program state. +/// Places roughly correspond to a "location in memory." Places in MIR are the same mathematical +/// object as places in Rust. This of course means that what exactly they are is undecided and part +/// of the Rust memory model. However, they will likely contain at least the following three pieces +/// of information in some form: +/// +/// 1. The part of memory that is referred to (see discussion below for details). +/// 2. The type of the place and an optional variant index. See [`PlaceTy`][tcx::PlaceTy] +/// 3. The provenance with which the place is being accessed. +/// +/// We'll give a description below of how the first two of these three properties are computed for a +/// place. We cannot give a description of the provenance, because that is part of the undecided +/// aliasing model - we only include it here at all to acknowledge its existence. +/// +/// For a place that has no projections, ie `Place { local, projection: [] }`, the part of memory is +/// the local's full allocation and the type is the type of the local. For any other place, we +/// define the values as a function of the parent place, that is the place with its last +/// [`ProjectionElem`] stripped. The way this is computed of course depends on the kind of that last +/// projection element: +/// +/// - [`Downcast`](ProjectionElem::Downcast): This projection sets the place's variant index to the +/// given one, and makes no other changes. A `Downcast` projection on a place with its variant +/// index already set is not well-formed. +/// - [`Field`](ProjectionElem::Field): `Field` projections take their parent place and create a +/// place referring to one of the fields of the type. The referred to place in memory is where +/// the layout places the field. The type becomes the type of the field. +/// +/// These projections are only legal for tuples, ADTs, closures, and generators. If the ADT or +/// generator has more than one variant, the parent place's variant index must be set, indicating +/// which variant is being used. If it has just one variant, the variant index may or may not be +/// included - the single possible variant is inferred if it is not included. +/// - [`ConstantIndex`](ProjectionElem::ConstantIndex): Computes an offset in units of `T` into the +/// place as described in the documentation for the `ProjectionElem`. The resulting part of +/// memory is the location of that element of the array/slice, and the type is `T`. This is only +/// legal if the parent place has type `[T; N]` or `[T]` (*not* `&[T]`). +/// - [`Subslice`](ProjectionElem::Subslice): Much like `ConstantIndex`. It is also only legal on +/// `[T; N]` and `[T]`. However, this yields a `Place` of type `[T]`, and may refer to more than +/// one element in the parent place. +/// - [`Index`](ProjectionElem::Index): Like `ConstantIndex`, only legal on `[T; N]` or `[T]`. +/// However, `Index` additionally takes a local from which the value of the index is computed at +/// runtime. Computing the value of the index involves interpreting the `Local` as a +/// `Place { local, projection: [] }`, and then computing its value as if done via +/// [`Operand::Copy`]. The array/slice is then indexed with the resulting value. The local must +/// have type `usize`. +/// - [`Deref`](ProjectionElem::Deref): Derefs are the last type of projection, and the most +/// complicated. They are only legal on parent places that are references, pointers, or `Box`. A +/// `Deref` projection begins by creating a value from the parent place, as if by +/// [`Operand::Copy`]. It then dereferences the resulting pointer, creating a place of the +/// pointed to type. +/// +/// **Needs clarification**: What about metadata resulting from dereferencing wide pointers (and +/// possibly from accessing unsized locals - not sure how those work)? That probably deserves to go +/// on the list above and be discussed too. It is also probably necessary for making the indexing +/// stuff lass hand-wavey. +/// +/// **Needs clarification**: When it says "part of memory" what does that mean precisely, and how +/// does it interact with the metadata? +/// +/// One possible model that I believe makes sense is that "part of memory" is actually just the +/// address of the beginning of the referred to range of bytes. For sized types, the size of the +/// range is then stored in the type, and for unsized types it's stored (possibly indirectly, +/// through a vtable) in the metadata. +/// +/// Alternatively, the "part of memory" could be a whole range of bytes. Initially seemed more +/// natural to me, but seems like it falls apart after a little bit. +/// +/// More likely though, we should call this detail a part of the Rust memory model and let that deal +/// with the precise definition of this part of a place. If we feel strongly, I don't think we *have +/// to* though. MIR places are more flexible than Rust places, and we might be able to make a +/// decision on the flexible parts without semi-stabilizing the source language. (end NC) +/// +/// Computing a place may be UB - this is certainly the case with dereferencing, which requires +/// sufficient provenance, but it may additionally be the case for some of the other field +/// projections. +/// +/// It is undecided when this UB kicks in. As best I can tell that is the question being discussed +/// in [UCG#319]. Summarizing from that thread, I believe the options are: +/// +/// [UCG#319]: https://github.com/rust-lang/unsafe-code-guidelines/issues/319 +/// +/// 1. Each intermediate place must have provenance for the whole part of memory it refers to. This +/// is the status quo. +/// 2. Only for intermediate place where the last projection was *not* a deref. This corresponds to +/// "Check inbounds on place projection". +/// 3. Only on place to value conversions, assignments, and referencing operation. This corresponds +/// to "remove the restrictions from `*` entirely." +/// 4. On each intermediate place if the place is used for a place to value conversion as part of +/// an assignment assignment or it is used for a referencing operation. For a raw pointer +/// computation, never. This corresponds to "magic?". +/// +/// Hopefully I am not misrepresenting anyone's opinions - please let me know if I am. Currently, +/// Rust chooses option 1. This is checked by MIRI and taken advantage of by codegen (via `gep +/// inbounds`). That is possibly subject to change. #[derive(Copy, Clone, PartialEq, Eq, Hash, TyEncodable, HashStable)] pub struct Place<'tcx> { pub local: Local, @@ -2155,24 +2245,42 @@ pub struct SourceScopeLocalData { /////////////////////////////////////////////////////////////////////////// // Operands -/// These are values that can appear inside an rvalue. They are intentionally -/// limited to prevent rvalues from being nested in one another. +/// An operand in MIR represents a "value" in Rust, the definition of which is undecided and part of +/// the memory model. One proposal for a definition of values can be found [on UCG][value-def]. +/// +/// [value-def]: https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/value-domain.md +/// +/// The most common way to create values is via a place to value conversion. A place to value +/// conversion is an operation which reads the memory of the place and converts it to a value. This +/// is a fundamentally *typed* operation. Different types will do different things. These are some +/// possible examples of what Rust may - but will not necessarily - decide to do on place to value +/// conversions: +/// +/// 1. Types with validity constraints cause UB if the validity constraint is not met +/// 2. References/pointers may have their provenance change or cause other provenance related +/// side-effects. +/// +/// A place to value conversion on a place that has its variant index set is not well-formed. +/// However, note that this rule only applies to places appearing in MIR bodies. Many functions, +/// such as [`Place::ty`], still accept such a place. If you write a function for which it might be +/// ambiguous whether such a thing is accepted, make sure to document your choice clearly. #[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] pub enum Operand<'tcx> { - /// Copy: The value must be available for use afterwards. - /// - /// This implies that the type of the place must be `Copy`; this is true - /// by construction during build, but also checked by the MIR type checker. + /// Creates a value by performing a place to value conversion at the given place. The type of + /// the place must be `Copy` Copy(Place<'tcx>), - /// Move: The value (including old borrows of it) will not be used again. + /// Creates a value by performing a place to value conversion for the place, just like the + /// `Copy` operand. + /// + /// This *may* additionally overwrite the place with `uninit` bytes, depending on how we decide + /// in [UCG#188]. You should not emit MIR that may attempt a subsequent second place to value + /// conversion on this place without first re-initializing it. /// - /// Safe for values of all types (modulo future developments towards `?Move`). - /// Correct usage patterns are enforced by the borrow checker for safe code. - /// `Copy` may be converted to `Move` to enable "last-use" optimizations. + /// [UCG#188]: https://github.com/rust-lang/unsafe-code-guidelines/issues/188 Move(Place<'tcx>), - /// Synthesizes a constant value. + /// Constants are already semantically values, and remain unchanged. Constant(Box>), } From 2f4a7a074299a1450f02365de2d80a8c13f03510 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Thu, 24 Mar 2022 18:25:37 -0400 Subject: [PATCH 07/17] Adjust computation of place types to detect more invalid places --- compiler/rustc_const_eval/src/transform/validate.rs | 10 +++++++++- compiler/rustc_middle/src/mir/tcx.rs | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index 58a7f6d1be0b1..af58bcabdf23d 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -3,8 +3,8 @@ use rustc_index::bit_set::BitSet; use rustc_infer::infer::TyCtxtInferExt; use rustc_middle::mir::interpret::Scalar; -use rustc_middle::mir::traversal; use rustc_middle::mir::visit::{PlaceContext, Visitor}; +use rustc_middle::mir::{traversal, Place}; use rustc_middle::mir::{ AggregateKind, BasicBlock, Body, BorrowKind, Local, Location, MirPass, MirPhase, Operand, PlaceElem, PlaceRef, ProjectionElem, Rvalue, SourceScope, Statement, StatementKind, Terminator, @@ -240,6 +240,14 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { self.super_projection_elem(local, proj_base, elem, context, location); } + fn visit_place(&mut self, place: &Place<'tcx>, _: PlaceContext, location: Location) { + // Set off any `bug!`s in the type computation code + let ty = place.ty(&self.body.local_decls, self.tcx); + if ty.variant_index.is_some() { + self.fail(location, "Top level places may not have their variant index set!"); + } + } + fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) { match &statement.kind { StatementKind::Assign(box (dest, rvalue)) => { diff --git a/compiler/rustc_middle/src/mir/tcx.rs b/compiler/rustc_middle/src/mir/tcx.rs index 51d8113840a93..597ade4223684 100644 --- a/compiler/rustc_middle/src/mir/tcx.rs +++ b/compiler/rustc_middle/src/mir/tcx.rs @@ -76,6 +76,9 @@ impl<'tcx> PlaceTy<'tcx> { V: ::std::fmt::Debug, T: ::std::fmt::Debug + Copy, { + if self.variant_index.is_some() && !matches!(elem, ProjectionElem::Field(..)) { + bug!("cannot use non field projection on downcasted place") + } let answer = match *elem { ProjectionElem::Deref => { let ty = self From 634369170a5bc73daa51b2b15763eb508885d9c0 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Thu, 24 Mar 2022 22:29:33 -0400 Subject: [PATCH 08/17] Add documentation for the semantics of MIR rvalues --- compiler/rustc_middle/src/lib.rs | 1 + compiler/rustc_middle/src/mir/mod.rs | 123 ++++++++++++++++++++++----- 2 files changed, 101 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_middle/src/lib.rs b/compiler/rustc_middle/src/lib.rs index fa2dad5ce25f0..fd2b5f5335f69 100644 --- a/compiler/rustc_middle/src/lib.rs +++ b/compiler/rustc_middle/src/lib.rs @@ -59,6 +59,7 @@ #![feature(unwrap_infallible)] #![feature(decl_macro)] #![feature(drain_filter)] +#![feature(intra_doc_pointers)] #![recursion_limit = "512"] #![allow(rustc::potential_query_instability)] diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 394bc12f015c1..8c4d289a4c34b 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -2388,57 +2388,134 @@ impl<'tcx> Operand<'tcx> { #[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq)] /// The various kinds of rvalues that can appear in MIR. /// -/// Not all of these are allowed at every [`MirPhase`]. Check the documentation there to see which -/// ones you do not have to worry about. The MIR validator will generally enforce such restrictions, -/// causing an ICE if they are violated. +/// Not all of these are allowed at every [`MirPhase`] - when this is the case, it's stated below. +/// +/// Computing any rvalue begins by evaluating the places and operands in the rvalue in the order in +/// which they appear. These are then used to produce a "value" - the same kind of value that an +/// [`Operand`] is. pub enum Rvalue<'tcx> { - /// x (either a move or copy, depending on type of x) + /// Yields the operand unchanged Use(Operand<'tcx>), - /// [x; 32] + /// Creates an array where each element is the value of the operand. This currently does not + /// drop the value even if the number of repetitions is zero, see [#74836]. + /// + /// Corresponds to source code like `[x; 32]`. + /// + /// [#74836]: https://github.com/rust-lang/rust/issues/74836 Repeat(Operand<'tcx>, ty::Const<'tcx>), - /// &x or &mut x + /// Creates a reference of the indicated kind to the place. + /// + /// There is not much to document here, because besides the obvious parts the semantics of this + /// are essentially entirely a part of the aliasing model. There are many UCG issues discussing + /// exactly what the behavior of this operation should be. + /// + /// `Shallow` borrows are disallowed after drop lowering. Ref(Region<'tcx>, BorrowKind, Place<'tcx>), - /// Accessing a thread local static. This is inherently a runtime operation, even if llvm - /// treats it as an access to a static. This `Rvalue` yields a reference to the thread local - /// static. + /// Returns a pointer/reference to the given thread local. + /// + /// The yielded type is a `*mut T` if the static is mutable, otherwise if the static is extern a + /// `*const T`, and if neither of those apply a `&T`. + /// + /// **Note:** This is a runtime operation that actually executes code and is in this sense more + /// like a function call. Also, DSEing these causes `fn main() {}` to SIGILL for some reason + /// that I never got a chance to look into. + /// + /// **Needs clarification**: Are there weird additional semantics here related to the runtime + /// nature of this operation? ThreadLocalRef(DefId), - /// Create a raw pointer to the given place - /// Can be generated by raw address of expressions (`&raw const x`), - /// or when casting a reference to a raw pointer. + /// Creates a pointer with the indicated mutability to the place. + /// + /// This is generated by pointer casts like `&v as *const _` or raw address of expressions like + /// `&raw v` or `addr_of!(v)`. + /// + /// Like with references, the semantics of this operation are heavily dependent on the aliasing + /// model. AddressOf(Mutability, Place<'tcx>), - /// length of a `[X]` or `[X;n]` value + /// Yields the length of the place, as a `usize`. + /// + /// If the type of the place is an array, this is the array length. This also works for slices + /// (`[T]`, not `&[T]`) through some mechanism that depends on how exactly places work (see + /// there for more details). Len(Place<'tcx>), + /// Performs essentially all of the casts that can be performed via `as`. + /// + /// This allows for casts from/to a variety of types. + /// + /// **FIXME**: Document exactly which `CastKind`s allow which types of casts. Figure out why + /// `ArrayToPointer` and `MutToConstPointer` are special. Cast(CastKind, Operand<'tcx>, Ty<'tcx>), + /// * `Offset` has the same semantics as [`offset`](pointer::offset), except that the second + /// paramter may be a `usize` as well. + /// * The comparison operations accept `bool`s, `char`s, signed or unsigned integers, floats, + /// raw pointers, or function pointers and return a `bool`. + /// * Left and right shift operations accept signed or unsigned integers not necessarily of the + /// same type and return a value of the same type as their LHS. For all other operations, the + /// types of the operands must match. + /// * The `Bit*` operations accept signed integers, unsigned integers, or bools and return a + /// value of that type. + /// * The remaining operations accept signed integers, unsigned integers, or floats of any + /// matching type and return a value of that type. BinaryOp(BinOp, Box<(Operand<'tcx>, Operand<'tcx>)>), + + /// Same as `BinaryOp`, but yields `(T, bool)` instead of `T`. In addition to performing the + /// same computation as the matching `BinaryOp`, checks if the infinite precison result would be + /// unequal to the actual result and sets the `bool` if this is the case. `BinOp::Offset` is not + /// allowed here. + /// + /// **FIXME**: What about division/modulo? Are they allowed here at all? Are zero divisors still + /// UB? Also, which other combinations of types are disallowed? CheckedBinaryOp(BinOp, Box<(Operand<'tcx>, Operand<'tcx>)>), + /// Yields the size or alignment of the type as a `usize`. NullaryOp(NullOp, Ty<'tcx>), + + /// Exactly like `BinaryOp`, but less operands. + /// + /// Also does two's-complement arithmetic. Negation requires a signed integer or a float; binary + /// not requires a signed integer, unsigned integer, or bool. Both operation kinds return a + /// value with the same type as their operand. UnaryOp(UnOp, Operand<'tcx>), - /// Read the discriminant of an ADT. + /// Computes the discriminant of the place, returning it as an integer of type + /// [`discriminant_ty`]. + /// + /// The validity requirements for the underlying value are undecided for this rvalue, see + /// [#91095]. Note too that the value of the discriminant is not the same thing as the + /// variant index; use [`discriminant_for_variant`] to convert. + /// + /// For types defined in the source code as enums, this is well behaved. This is also well + /// formed for other types, but yields no particular value - there is no reason it couldn't be + /// defined to yield eg zero though. /// - /// Undefined (i.e., no effort is made to make it defined, but there’s no reason why it cannot - /// be defined to return, say, a 0) if ADT is not an enum. + /// [`discriminant_ty`]: crate::ty::Ty::discriminant_ty + /// [#91095]: https://github.com/rust-lang/rust/issues/91095 + /// [`discriminant_for_variant`]: crate::ty::Ty::discriminant_for_variant Discriminant(Place<'tcx>), - /// Creates an aggregate value, like a tuple or struct. This is - /// only needed because we want to distinguish `dest = Foo { x: - /// ..., y: ... }` from `dest.x = ...; dest.y = ...;` in the case - /// that `Foo` has a destructor. These rvalues can be optimized - /// away after type-checking and before lowering. + /// Creates an aggregate value, like a tuple or struct. + /// + /// This is needed because dataflow analysis needs to distinguish + /// `dest = Foo { x: ..., y: ... }` from `dest.x = ...; dest.y = ...;` in the case that `Foo` + /// has a destructor. + /// + /// Disallowed after deaggregation for all aggregate kinds except `Array` and `Generator`. After + /// generator lowering, `Generator` aggregate kinds are disallowed too. Aggregate(Box>, Vec>), /// Transmutes a `*mut u8` into shallow-initialized `Box`. /// - /// This is different a normal transmute because dataflow analysis will treat the box - /// as initialized but its content as uninitialized. + /// This is different a normal transmute because dataflow analysis will treat the box as + /// initialized but its content as uninitialized. Like other pointer casts, this in general + /// affects alias analysis. + /// + /// Disallowed after drop elaboration. ShallowInitBox(Operand<'tcx>, Ty<'tcx>), } From 9ac5e986ed8e7d589787532857ef74576473adcf Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Thu, 24 Mar 2022 22:30:23 -0400 Subject: [PATCH 09/17] Extend the MIR validator to check many more things around rvalues. --- .../src/transform/validate.rs | 200 ++++++++++++++---- src/test/mir-opt/lower_intrinsics.rs | 2 +- ...r_intrinsics.wrapping.LowerIntrinsics.diff | 40 ++-- 3 files changed, 182 insertions(+), 60 deletions(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index af58bcabdf23d..7eb91385653e5 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -4,14 +4,13 @@ use rustc_index::bit_set::BitSet; use rustc_infer::infer::TyCtxtInferExt; use rustc_middle::mir::interpret::Scalar; use rustc_middle::mir::visit::{PlaceContext, Visitor}; -use rustc_middle::mir::{traversal, Place}; use rustc_middle::mir::{ - AggregateKind, BasicBlock, Body, BorrowKind, Local, Location, MirPass, MirPhase, Operand, - PlaceElem, PlaceRef, ProjectionElem, Rvalue, SourceScope, Statement, StatementKind, Terminator, - TerminatorKind, START_BLOCK, + traversal, AggregateKind, BasicBlock, BinOp, Body, BorrowKind, Local, Location, MirPass, + MirPhase, Operand, Place, PlaceElem, PlaceRef, ProjectionElem, Rvalue, SourceScope, Statement, + StatementKind, Terminator, TerminatorKind, UnOp, START_BLOCK, }; use rustc_middle::ty::fold::BottomUpFolder; -use rustc_middle::ty::{self, ParamEnv, Ty, TyCtxt, TypeFoldable}; +use rustc_middle::ty::{self, InstanceDef, ParamEnv, Ty, TyCtxt, TypeFoldable}; use rustc_mir_dataflow::impls::MaybeStorageLive; use rustc_mir_dataflow::storage::AlwaysLiveLocals; use rustc_mir_dataflow::{Analysis, ResultsCursor}; @@ -36,6 +35,13 @@ pub struct Validator { impl<'tcx> MirPass<'tcx> for Validator { fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { + // FIXME(JakobDegen): These bodies never instantiated in codegend anyway, so it's not + // terribly important that they pass the validator. However, I think other passes might + // still see them, in which case they might be surprised. It would probably be better if we + // didn't put this through the MIR pipeline at all. + if matches!(body.source.instance, InstanceDef::Intrinsic(..) | InstanceDef::Virtual(..)) { + return; + } let def_id = body.source.def_id(); let param_env = tcx.param_env(def_id); let mir_phase = self.mir_phase; @@ -248,58 +254,174 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { } } - fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) { - match &statement.kind { - StatementKind::Assign(box (dest, rvalue)) => { - // LHS and RHS of the assignment must have the same type. - let left_ty = dest.ty(&self.body.local_decls, self.tcx).ty; - let right_ty = rvalue.ty(&self.body.local_decls, self.tcx); - if !self.mir_assign_valid_types(right_ty, left_ty) { + fn visit_rvalue(&mut self, rvalue: &Rvalue<'tcx>, location: Location) { + macro_rules! check_kinds { + ($t:expr, $text:literal, $($patterns:tt)*) => { + if !matches!(($t).kind(), $($patterns)*) { + self.fail(location, format!($text, $t)); + } + }; + } + match rvalue { + Rvalue::Use(_) => {} + Rvalue::Aggregate(agg_kind, _) => { + let disallowed = match **agg_kind { + AggregateKind::Array(..) => false, + AggregateKind::Generator(..) => self.mir_phase >= MirPhase::GeneratorsLowered, + _ => self.mir_phase >= MirPhase::Deaggregated, + }; + if disallowed { self.fail( location, - format!( - "encountered `{:?}` with incompatible types:\n\ - left-hand side has type: {}\n\ - right-hand side has type: {}", - statement.kind, left_ty, right_ty, - ), + format!("{:?} have been lowered to field assignments", rvalue), + ) + } + } + Rvalue::Ref(_, BorrowKind::Shallow, _) => { + if self.mir_phase >= MirPhase::DropsLowered { + self.fail( + location, + "`Assign` statement with a `Shallow` borrow should have been removed after drop lowering phase", ); } - match rvalue { - // The sides of an assignment must not alias. Currently this just checks whether the places - // are identical. - Rvalue::Use(Operand::Copy(src) | Operand::Move(src)) => { - if dest == src { + } + Rvalue::Len(p) => { + let pty = p.ty(&self.body.local_decls, self.tcx).ty; + check_kinds!( + pty, + "Cannot compute length of non-array type {:?}", + ty::Array(..) | ty::Slice(..) + ); + } + Rvalue::BinaryOp(op, vals) | Rvalue::CheckedBinaryOp(op, vals) => { + use BinOp::*; + let a = vals.0.ty(&self.body.local_decls, self.tcx); + let b = vals.1.ty(&self.body.local_decls, self.tcx); + match op { + Offset => { + check_kinds!(a, "Cannot offset non-pointer type {:?}", ty::RawPtr(..)); + if b != self.tcx.types.isize && b != self.tcx.types.usize { + self.fail(location, format!("Cannot offset by non-isize type {:?}", b)); + } + } + Eq | Lt | Le | Ne | Ge | Gt => { + for x in [a, b] { + check_kinds!( + x, + "Cannot compare type {:?}", + ty::Bool + | ty::Char + | ty::Int(..) + | ty::Uint(..) + | ty::Float(..) + | ty::RawPtr(..) + | ty::FnPtr(..) + ) + } + // None of the possible types have lifetimes, so we can just compare + // directly + if a != b { self.fail( location, - "encountered `Assign` statement with overlapping memory", + format!("Cannot compare unequal types {:?} and {:?}", a, b), ); } } - Rvalue::Aggregate(agg_kind, _) => { - let disallowed = match **agg_kind { - AggregateKind::Array(..) => false, - AggregateKind::Generator(..) => { - self.mir_phase >= MirPhase::GeneratorsLowered - } - _ => self.mir_phase >= MirPhase::Deaggregated, - }; - if disallowed { + Shl | Shr => { + for x in [a, b] { + check_kinds!( + x, + "Cannot shift non-integer type {:?}", + ty::Uint(..) | ty::Int(..) + ) + } + } + BitAnd | BitOr | BitXor => { + for x in [a, b] { + check_kinds!( + x, + "Cannot perform bitwise op on type {:?}", + ty::Uint(..) | ty::Int(..) | ty::Bool + ) + } + if a != b { self.fail( location, - format!("{:?} have been lowered to field assignments", rvalue), - ) + format!( + "Cannot perform bitwise op on unequal types {:?} and {:?}", + a, b + ), + ); } } - Rvalue::Ref(_, BorrowKind::Shallow, _) => { - if self.mir_phase >= MirPhase::DropsLowered { + Add | Sub | Mul | Div | Rem => { + for x in [a, b] { + check_kinds!( + x, + "Cannot perform op on type {:?}", + ty::Uint(..) | ty::Int(..) | ty::Float(..) + ) + } + if a != b { self.fail( location, - "`Assign` statement with a `Shallow` borrow should have been removed after drop lowering phase", + format!("Cannot perform op on unequal types {:?} and {:?}", a, b), ); } } - _ => {} + } + } + Rvalue::UnaryOp(op, operand) => { + let a = operand.ty(&self.body.local_decls, self.tcx); + match op { + UnOp::Neg => { + check_kinds!(a, "Cannot negate type {:?}", ty::Int(..) | ty::Float(..)) + } + UnOp::Not => { + check_kinds!( + a, + "Cannot binary not type {:?}", + ty::Int(..) | ty::Uint(..) | ty::Bool + ); + } + } + } + Rvalue::ShallowInitBox(operand, _) => { + let a = operand.ty(&self.body.local_decls, self.tcx); + check_kinds!(a, "Cannot shallow init type {:?}", ty::RawPtr(..)); + } + _ => {} + } + self.super_rvalue(rvalue, location); + } + + fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) { + match &statement.kind { + StatementKind::Assign(box (dest, rvalue)) => { + // LHS and RHS of the assignment must have the same type. + let left_ty = dest.ty(&self.body.local_decls, self.tcx).ty; + let right_ty = rvalue.ty(&self.body.local_decls, self.tcx); + if !self.mir_assign_valid_types(right_ty, left_ty) { + self.fail( + location, + format!( + "encountered `{:?}` with incompatible types:\n\ + left-hand side has type: {}\n\ + right-hand side has type: {}", + statement.kind, left_ty, right_ty, + ), + ); + } + // FIXME(JakobDegen): Check this for all rvalues, not just this one. + if let Rvalue::Use(Operand::Copy(src) | Operand::Move(src)) = rvalue { + // The sides of an assignment must not alias. Currently this just checks whether + // the places are identical. + if dest == src { + self.fail( + location, + "encountered `Assign` statement with overlapping memory", + ); + } } } StatementKind::AscribeUserType(..) => { diff --git a/src/test/mir-opt/lower_intrinsics.rs b/src/test/mir-opt/lower_intrinsics.rs index 8a8880dad02e5..eab51b65f1a19 100644 --- a/src/test/mir-opt/lower_intrinsics.rs +++ b/src/test/mir-opt/lower_intrinsics.rs @@ -3,7 +3,7 @@ #![crate_type = "lib"] // EMIT_MIR lower_intrinsics.wrapping.LowerIntrinsics.diff -pub fn wrapping(a: T, b: T) { +pub fn wrapping(a: i32, b: i32) { let _x = core::intrinsics::wrapping_add(a, b); let _y = core::intrinsics::wrapping_sub(a, b); let _z = core::intrinsics::wrapping_mul(a, b); diff --git a/src/test/mir-opt/lower_intrinsics.wrapping.LowerIntrinsics.diff b/src/test/mir-opt/lower_intrinsics.wrapping.LowerIntrinsics.diff index a531a19bd7820..5a0286bad2fb7 100644 --- a/src/test/mir-opt/lower_intrinsics.wrapping.LowerIntrinsics.diff +++ b/src/test/mir-opt/lower_intrinsics.wrapping.LowerIntrinsics.diff @@ -1,23 +1,23 @@ - // MIR for `wrapping` before LowerIntrinsics + // MIR for `wrapping` after LowerIntrinsics - fn wrapping(_1: T, _2: T) -> () { - debug a => _1; // in scope 0 at $DIR/lower_intrinsics.rs:6:26: 6:27 - debug b => _2; // in scope 0 at $DIR/lower_intrinsics.rs:6:32: 6:33 - let mut _0: (); // return place in scope 0 at $DIR/lower_intrinsics.rs:6:38: 6:38 - let _3: T; // in scope 0 at $DIR/lower_intrinsics.rs:7:9: 7:11 - let mut _4: T; // in scope 0 at $DIR/lower_intrinsics.rs:7:45: 7:46 - let mut _5: T; // in scope 0 at $DIR/lower_intrinsics.rs:7:48: 7:49 - let mut _7: T; // in scope 0 at $DIR/lower_intrinsics.rs:8:45: 8:46 - let mut _8: T; // in scope 0 at $DIR/lower_intrinsics.rs:8:48: 8:49 - let mut _10: T; // in scope 0 at $DIR/lower_intrinsics.rs:9:45: 9:46 - let mut _11: T; // in scope 0 at $DIR/lower_intrinsics.rs:9:48: 9:49 + fn wrapping(_1: i32, _2: i32) -> () { + debug a => _1; // in scope 0 at $DIR/lower_intrinsics.rs:6:17: 6:18 + debug b => _2; // in scope 0 at $DIR/lower_intrinsics.rs:6:25: 6:26 + let mut _0: (); // return place in scope 0 at $DIR/lower_intrinsics.rs:6:33: 6:33 + let _3: i32; // in scope 0 at $DIR/lower_intrinsics.rs:7:9: 7:11 + let mut _4: i32; // in scope 0 at $DIR/lower_intrinsics.rs:7:45: 7:46 + let mut _5: i32; // in scope 0 at $DIR/lower_intrinsics.rs:7:48: 7:49 + let mut _7: i32; // in scope 0 at $DIR/lower_intrinsics.rs:8:45: 8:46 + let mut _8: i32; // in scope 0 at $DIR/lower_intrinsics.rs:8:48: 8:49 + let mut _10: i32; // in scope 0 at $DIR/lower_intrinsics.rs:9:45: 9:46 + let mut _11: i32; // in scope 0 at $DIR/lower_intrinsics.rs:9:48: 9:49 scope 1 { debug _x => _3; // in scope 1 at $DIR/lower_intrinsics.rs:7:9: 7:11 - let _6: T; // in scope 1 at $DIR/lower_intrinsics.rs:8:9: 8:11 + let _6: i32; // in scope 1 at $DIR/lower_intrinsics.rs:8:9: 8:11 scope 2 { debug _y => _6; // in scope 2 at $DIR/lower_intrinsics.rs:8:9: 8:11 - let _9: T; // in scope 2 at $DIR/lower_intrinsics.rs:9:9: 9:11 + let _9: i32; // in scope 2 at $DIR/lower_intrinsics.rs:9:9: 9:11 scope 3 { debug _z => _9; // in scope 3 at $DIR/lower_intrinsics.rs:9:9: 9:11 } @@ -30,10 +30,10 @@ _4 = _1; // scope 0 at $DIR/lower_intrinsics.rs:7:45: 7:46 StorageLive(_5); // scope 0 at $DIR/lower_intrinsics.rs:7:48: 7:49 _5 = _2; // scope 0 at $DIR/lower_intrinsics.rs:7:48: 7:49 -- _3 = wrapping_add::(move _4, move _5) -> bb1; // scope 0 at $DIR/lower_intrinsics.rs:7:14: 7:50 +- _3 = wrapping_add::(move _4, move _5) -> bb1; // scope 0 at $DIR/lower_intrinsics.rs:7:14: 7:50 - // mir::Constant - // + span: $DIR/lower_intrinsics.rs:7:14: 7:44 -- // + literal: Const { ty: extern "rust-intrinsic" fn(T, T) -> T {wrapping_add::}, val: Value(Scalar()) } +- // + literal: Const { ty: extern "rust-intrinsic" fn(i32, i32) -> i32 {wrapping_add::}, val: Value(Scalar()) } + _3 = Add(move _4, move _5); // scope 0 at $DIR/lower_intrinsics.rs:7:14: 7:50 + goto -> bb1; // scope 0 at $DIR/lower_intrinsics.rs:7:14: 7:50 } @@ -46,10 +46,10 @@ _7 = _1; // scope 1 at $DIR/lower_intrinsics.rs:8:45: 8:46 StorageLive(_8); // scope 1 at $DIR/lower_intrinsics.rs:8:48: 8:49 _8 = _2; // scope 1 at $DIR/lower_intrinsics.rs:8:48: 8:49 -- _6 = wrapping_sub::(move _7, move _8) -> bb2; // scope 1 at $DIR/lower_intrinsics.rs:8:14: 8:50 +- _6 = wrapping_sub::(move _7, move _8) -> bb2; // scope 1 at $DIR/lower_intrinsics.rs:8:14: 8:50 - // mir::Constant - // + span: $DIR/lower_intrinsics.rs:8:14: 8:44 -- // + literal: Const { ty: extern "rust-intrinsic" fn(T, T) -> T {wrapping_sub::}, val: Value(Scalar()) } +- // + literal: Const { ty: extern "rust-intrinsic" fn(i32, i32) -> i32 {wrapping_sub::}, val: Value(Scalar()) } + _6 = Sub(move _7, move _8); // scope 1 at $DIR/lower_intrinsics.rs:8:14: 8:50 + goto -> bb2; // scope 1 at $DIR/lower_intrinsics.rs:8:14: 8:50 } @@ -62,10 +62,10 @@ _10 = _1; // scope 2 at $DIR/lower_intrinsics.rs:9:45: 9:46 StorageLive(_11); // scope 2 at $DIR/lower_intrinsics.rs:9:48: 9:49 _11 = _2; // scope 2 at $DIR/lower_intrinsics.rs:9:48: 9:49 -- _9 = wrapping_mul::(move _10, move _11) -> bb3; // scope 2 at $DIR/lower_intrinsics.rs:9:14: 9:50 +- _9 = wrapping_mul::(move _10, move _11) -> bb3; // scope 2 at $DIR/lower_intrinsics.rs:9:14: 9:50 - // mir::Constant - // + span: $DIR/lower_intrinsics.rs:9:14: 9:44 -- // + literal: Const { ty: extern "rust-intrinsic" fn(T, T) -> T {wrapping_mul::}, val: Value(Scalar()) } +- // + literal: Const { ty: extern "rust-intrinsic" fn(i32, i32) -> i32 {wrapping_mul::}, val: Value(Scalar()) } + _9 = Mul(move _10, move _11); // scope 2 at $DIR/lower_intrinsics.rs:9:14: 9:50 + goto -> bb3; // scope 2 at $DIR/lower_intrinsics.rs:9:14: 9:50 } @@ -73,7 +73,7 @@ bb3: { StorageDead(_11); // scope 2 at $DIR/lower_intrinsics.rs:9:49: 9:50 StorageDead(_10); // scope 2 at $DIR/lower_intrinsics.rs:9:49: 9:50 - _0 = const (); // scope 0 at $DIR/lower_intrinsics.rs:6:38: 10:2 + _0 = const (); // scope 0 at $DIR/lower_intrinsics.rs:6:33: 10:2 StorageDead(_9); // scope 2 at $DIR/lower_intrinsics.rs:10:1: 10:2 StorageDead(_6); // scope 1 at $DIR/lower_intrinsics.rs:10:1: 10:2 StorageDead(_3); // scope 0 at $DIR/lower_intrinsics.rs:10:1: 10:2 From 8e01cd612787f2aab5ee7e4650b13941fc0b1707 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Fri, 25 Mar 2022 02:17:05 -0400 Subject: [PATCH 10/17] Improve documentation for MIR statement kinds. --- compiler/rustc_middle/src/mir/mod.rs | 92 +++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 8c4d289a4c34b..a01261c543ba6 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -1573,18 +1573,44 @@ impl Statement<'_> { /// causing an ICE if they are violated. #[derive(Clone, Debug, PartialEq, TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable)] pub enum StatementKind<'tcx> { - /// Write the RHS Rvalue to the LHS Place. + /// Assign statements roughly correspond to an assignment in Rust proper (`x = ...`) except + /// without the possibility of dropping the previous value (that must be done separately, if at + /// all). The *exact* way this works is undecided. It probably does something like evaluating + /// the LHS and RHS, and then doing the inverse of a place to value conversion to write the + /// resulting value into memory. Various parts of this may do type specific things that are more + /// complicated than simply copying over the bytes depending on the types. /// - /// The LHS place may not overlap with any memory accessed on the RHS. + /// **Needs clarification**: The implication of the above idea would be that assignment implies + /// that the resulting value is initialized. I believe we could commit to this separately from + /// committing to whatever part of the memory model we would need to decide on to make the above + /// paragragh precise. Do we want to? + /// + /// Assignments in which the types of the place and rvalue differ are not well-formed. + /// + /// **Needs clarification**: Do we ever want to worry about non-free (in the body) lifetimes for + /// the typing requirement in post drop-elaboration MIR? I think probably not - I'm not sure we + /// could meaningfully require this anyway. How about free lifetimes? Is ignoring this + /// interesting for optimizations? Do we want to allow such optimizations? + /// + /// **Needs clarification**: We currently require that the LHS place not overlap with any place + /// read as part of computation of the RHS. This requirement is under discussion in [#68364]. As + /// a part of this discussion, it is also unclear in what order the components are evaluated. + /// + /// [#68364]: https://github.com/rust-lang/rust/issues/68364 + /// + /// See [`Rvalue`] documentation for details on each of those. Assign(Box<(Place<'tcx>, Rvalue<'tcx>)>), - /// This represents all the reading that a pattern match may do - /// (e.g., inspecting constants and discriminant values), and the - /// kind of pattern it comes from. This is in order to adapt potential - /// error messages to these specific patterns. + /// This represents all the reading that a pattern match may do (e.g., inspecting constants and + /// discriminant values), and the kind of pattern it comes from. This is in order to adapt + /// potential error messages to these specific patterns. /// /// Note that this also is emitted for regular `let` bindings to ensure that locals that are /// never accessed still get some sanity checks for, e.g., `let x: ! = ..;` + /// + /// When executed at runtime this is a nop. + /// + /// Disallowed after drop elaboration. FakeRead(Box<(FakeReadCause, Place<'tcx>)>), /// Write the discriminant for a variant to the enum Place. @@ -1599,17 +1625,36 @@ pub enum StatementKind<'tcx> { /// This writes `uninit` bytes to the entire place. Deinit(Box>), - /// Start a live range for the storage of the local. + /// `StorageLive` and `StorageDead` statements mark the live range of a local. + /// + /// Using a local before a `StorageLive` or after a `StorageDead` is not well-formed. These + /// statements are not required. If the entire MIR body contains no `StorageLive`/`StorageDead` + /// statements for a particular local, the local is always considered live. + /// + /// More precisely, the MIR validator currently does a `MaybeLiveLocals` analysis to check + /// validity of each use of a local. I believe this is equivalent to requiring for every use of + /// a local, there exist at least one path from the root to that use that contains a + /// `StorageLive` more recently than a `StorageDead`. + /// + /// **Needs clarification**: Is it permitted to `StorageLive` a local for which we previously + /// executed `StorageDead`? How about two `StorageLive`s without an intervening `StorageDead`? + /// Two `StorageDead`s without an intervening `StorageLive`? LLVM says yes, poison, yes. If the + /// answer to any of these is "no," is breaking that rule UB or is it an error to have a path in + /// the CFG that might do this? StorageLive(Local), - /// End the current live range for the storage of the local. + /// See `StorageLive` above. StorageDead(Local), - /// Retag references in the given place, ensuring they got fresh tags. This is - /// part of the Stacked Borrows model. These statements are currently only interpreted - /// by miri and only generated when "-Z mir-emit-retag" is passed. - /// See - /// for more details. + /// Retag references in the given place, ensuring they got fresh tags. + /// + /// This is part of the Stacked Borrows model. These statements are currently only interpreted + /// by miri and only generated when `-Z mir-emit-retag` is passed. See + /// for + /// more details. + /// + /// For code that is not specific to stacked borrows, you should consider statements to read + /// and modify the place in an opaque way. Retag(RetagKind, Box>), /// Encodes a user's type ascription. These need to be preserved @@ -1624,6 +1669,10 @@ pub enum StatementKind<'tcx> { /// - `Contravariant` -- requires that `T_y :> T` /// - `Invariant` -- requires that `T_y == T` /// - `Bivariant` -- no effect + /// + /// When executed at runtime this is a nop. + /// + /// Disallowed after drop elaboration. AscribeUserType(Box<(Place<'tcx>, UserTypeProjection)>, ty::Variance), /// Marks the start of a "coverage region", injected with '-Cinstrument-coverage'. A @@ -1633,9 +1682,20 @@ pub enum StatementKind<'tcx> { /// executed. Coverage(Box), - /// Denotes a call to the intrinsic function copy_overlapping, where `src_dst` denotes the - /// memory being read from and written to(one field to save memory), and size - /// indicates how many bytes are being copied over. + /// Denotes a call to the intrinsic function `copy_overlapping`. + /// + /// First, all three operands are evaluated. `src` and `dest` must each be a reference, pointer, + /// or `Box` pointing to the same type `T`. `count` must evaluate to a `usize`. Then, `src` and + /// `dest` are dereferenced, and `count * size_of::()` bytes beginning with the first byte of + /// the `src` place are copied to the continguous range of bytes beginning with the first byte + /// of `dest`. + /// + /// **Needs clarification**: In what order are operands computed and dereferenced? It should + /// probably match the order for assignment, but that is also undecided. + /// + /// **Needs clarification**: Is this typed or not, ie is there a place to value and back + /// conversion involved? I vaguely remember Ralf saying somewhere that he thought it should not + /// be. CopyNonOverlapping(Box>), /// No-op. Useful for deleting instructions without affecting statement indices. From f1f25c0f8145c45078ff628dedae6aa4b28d962d Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Fri, 25 Mar 2022 20:00:16 -0400 Subject: [PATCH 11/17] Improve documentation for MIR terminators --- compiler/rustc_middle/src/mir/terminator.rs | 148 ++++++++++++++++---- 1 file changed, 121 insertions(+), 27 deletions(-) diff --git a/compiler/rustc_middle/src/mir/terminator.rs b/compiler/rustc_middle/src/mir/terminator.rs index ae94bd121f953..bf68835235da6 100644 --- a/compiler/rustc_middle/src/mir/terminator.rs +++ b/compiler/rustc_middle/src/mir/terminator.rs @@ -105,13 +105,34 @@ impl<'a> Iterator for SwitchTargetsIter<'a> { impl<'a> ExactSizeIterator for SwitchTargetsIter<'a> {} +/// A note on unwinding: Panics may occur during the execution of some terminators. Depending on the +/// `-C panic` flag, this may either cause the program to abort or the call stack to unwind. Such +/// terminators have a `cleanup: Option` field on them. If stack unwinding occurs, then +/// once the current function is reached, execution continues at the given basic block, if any. If +/// `cleanup` is `None` then no cleanup is performed, and the stack continues unwinding. This is +/// equivalent to the execution of a `Resume` terminator. +/// +/// The basic block pointed to by a `cleanup` field must have its `cleanup` flag set. `cleanup` +/// basic blocks have a couple restrictions: +/// 1. All `cleanup` fields in them must be `None`. +/// 2. `Return` terminators are not allowed in them. `Abort` and `Unwind` terminators are. +/// 3. All other basic blocks (in the current body) that are reachable from `cleanup` basic blocks +/// must also be `cleanup`. This is a part of the type system and checked statically, so it is +/// still an error to have such an edge in the CFG even if it's known that it won't be taken at +/// runtime. #[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq)] pub enum TerminatorKind<'tcx> { - /// Block should have one successor in the graph; we jump there. + /// Block has one successor; we continue execution there. Goto { target: BasicBlock }, - /// Operand evaluates to an integer; jump depending on its value - /// to one of the targets, and otherwise fallback to `otherwise`. + /// Switches based on the computed value. + /// + /// First, evaluates the `discr` operand. The type of the operand must be a signed or unsigned + /// integer, char, or bool, and must match the given type. Then, if the list of switch targets + /// contains the computed value, continues execution at the associated basic block. Otherwise, + /// continues execution at the "otherwise" basic block. + /// + /// Target values may not appear more than once. SwitchInt { /// The discriminant value being tested. discr: Operand<'tcx>, @@ -124,29 +145,62 @@ pub enum TerminatorKind<'tcx> { targets: SwitchTargets, }, - /// Indicates that the landing pad is finished and unwinding should - /// continue. Emitted by `build::scope::diverge_cleanup`. + /// Indicates that the landing pad is finished and that the process should continue unwinding. + /// + /// Like a return, this marks the end of this invocation of the function. + /// + /// Only permitted in cleanup blocks. `Resume` is not permitted with `-C unwind=abort` after + /// deaggregation runs. Resume, - /// Indicates that the landing pad is finished and that the process - /// should abort. Used to prevent unwinding for foreign items. + /// Indicates that the landing pad is finished and that the process should abort. + /// + /// Used to prevent unwinding for foreign items or with `-C unwind=abort`. Only permitted in + /// cleanup blocks. Abort, - /// Indicates a normal return. The return place should have - /// been filled in before this executes. This can occur multiple times - /// in different basic blocks. + /// Returns from the function. + /// + /// Like function calls, the exact semantics of returns in Rust are unclear. Returning very + /// likely at least assigns the value currently in the return place (`_0`) to the place + /// specified in the associated `Call` terminator in the calling function, as if assigned via + /// `dest = move _0`. It might additionally do other things, like have side-effects in the + /// aliasing model. + /// + /// If the body is a generator body, this has slightly different semantics; it instead causes a + /// `GeneratorState::Returned(_0)` to be created (as if by an `Aggregate` rvalue) and assigned + /// to the return place. Return, /// Indicates a terminator that can never be reached. + /// + /// Executing this terminator is UB. Unreachable, - /// Drop the `Place`. + /// The behavior of this statement differs significantly before and after drop elaboration. + /// After drop elaboration, `Drop` executes the drop glue for the specified place, after which + /// it continues execution/unwinds at the given basic blocks. It is possible that executing drop + /// glue is special - this would be part of Rust's memory model. (**FIXME**: due we have an + /// issue tracking if drop glue has any interesting semantics in addition to those of a function + /// call?) + /// + /// `Drop` before drop elaboration is a *conditional* execution of the drop glue. Specifically, the + /// `Drop` will be executed if... + /// + /// **Needs clarification**: End of that sentence. This in effect should document the exact + /// behavior of drop elaboration. The following sounds vaguely right, but I'm not quite sure: + /// + /// > The drop glue is executed if, among all statements executed within this `Body`, an assignment to + /// > the place or one of its "parents" occurred more recently than a move out of it. This does not + /// > consider indirect assignments. Drop { place: Place<'tcx>, target: BasicBlock, unwind: Option }, - /// Drop the `Place` and assign the new value over it. This ensures - /// that the assignment to `P` occurs *even if* the destructor for - /// place unwinds. Its semantics are best explained by the - /// elaboration: + /// Drops the place and assigns a new value to it. + /// + /// This first performs the exact same operation as the pre drop-elaboration `Drop` terminator; + /// it then additionally assigns the `value` to the `place` as if by an assignment statement. + /// This assignment occurs both in the unwind and the regular code paths. The semantics are best + /// explained by the elaboration: /// /// ``` /// BB0 { @@ -170,7 +224,7 @@ pub enum TerminatorKind<'tcx> { /// } /// ``` /// - /// Note that DropAndReplace is eliminated as part of the `ElaborateDrops` pass. + /// Disallowed after drop elaboration. DropAndReplace { place: Place<'tcx>, value: Operand<'tcx>, @@ -178,7 +232,14 @@ pub enum TerminatorKind<'tcx> { unwind: Option, }, - /// Block ends with a call of a function. + /// Roughly speaking, evaluates the `func` operand and the arguments, and starts execution of + /// the referred to function. The operand types must match the argument types of the function. + /// The return place type must exactly match the return type. The type of the `func` operand + /// must be callable, meaning either a function pointer, a function type, or a closure type. + /// + /// **Needs clarification**: The exact semantics of this, see [#71117]. + /// + /// [#71117]: https://github.com/rust-lang/rust/issues/71117 Call { /// The function that’s being called. func: Operand<'tcx>, @@ -187,7 +248,7 @@ pub enum TerminatorKind<'tcx> { /// This allows the memory occupied by "by-value" arguments to be /// reused across function calls without duplicating the contents. args: Vec>, - /// Destination for the return value. If some, the call is converging. + /// Destination for the return value. If none, the call necessarily diverges. destination: Option<(Place<'tcx>, BasicBlock)>, /// Cleanups to be done if the call unwinds. cleanup: Option, @@ -199,8 +260,12 @@ pub enum TerminatorKind<'tcx> { fn_span: Span, }, - /// Jump to the target if the condition has the expected value, - /// otherwise panic with a message and a cleanup target. + /// Evaluates the operand, which must have type `bool`. If it is not equal to `expected`, + /// initiates a panic. Initiating a panic corresponds to a `Call` terminator with some + /// unspecified constant as the function to call, all the operands stored in the `AssertMessage` + /// as parameters, and `None` for the destination. Keep in mind that the `cleanup` path is not + /// necessarily executed even in the case of a panic, for example in `-C panic=abort`. If the + /// assertion does not fail, execution continues at the specified basic block. Assert { cond: Operand<'tcx>, expected: bool, @@ -209,7 +274,18 @@ pub enum TerminatorKind<'tcx> { cleanup: Option, }, - /// A suspend point. + /// Marks a suspend point. + /// + /// Like `Return` terminators in generator bodies, this computes `value` and then a + /// `GeneratorState::Yielded(value)` as if by `Aggregate` rvalue. That value is then assigned to + /// the return place of the function calling this one, and execution continues in the calling + /// function. When next invoked with the same first argument, execution of this function + /// continues at the `resume` basic block, with the second argument written to the `resume_arg` + /// place. If the generator is dropped before then, the `drop` basic block is invoked. + /// + /// Not permitted in bodies that are not generator bodies, or after generator lowering. + /// + /// **Needs clarification**: What about the evaluation order of the `resume_arg` and `value`? Yield { /// The value to return. value: Operand<'tcx>, @@ -221,11 +297,24 @@ pub enum TerminatorKind<'tcx> { drop: Option, }, - /// Indicates the end of the dropping of a generator. + /// Indicates the end of dropping a generator. + /// + /// Semantically just a `return` (from the generators drop glue). Only permitted in the same situations + /// as `yield`. + /// + /// **Needs clarification**: Is that even correct? The generator drop code is always confusing + /// to me, because it's not even really in the current body. + /// + /// **Needs clarification**: Are there type system constraints on these terminators? Should + /// there be a "block type" like `cleanup` blocks for them? GeneratorDrop, - /// A block where control flow only ever takes one real path, but borrowck - /// needs to be more conservative. + /// A block where control flow only ever takes one real path, but borrowck needs to be more + /// conservative. + /// + /// At runtime this is semantically just a goto. + /// + /// Disallowed after drop elaboration. FalseEdge { /// The target normal control flow will take. real_target: BasicBlock, @@ -233,9 +322,14 @@ pub enum TerminatorKind<'tcx> { /// practice. imaginary_target: BasicBlock, }, - /// A terminator for blocks that only take one path in reality, but where we - /// reserve the right to unwind in borrowck, even if it won't happen in practice. - /// This can arise in infinite loops with no function calls for example. + + /// A terminator for blocks that only take one path in reality, but where we reserve the right + /// to unwind in borrowck, even if it won't happen in practice. This can arise in infinite loops + /// with no function calls for example. + /// + /// At runtime this is semantically just a goto. + /// + /// Disallowed after drop elaboration. FalseUnwind { /// The target normal control flow will take. real_target: BasicBlock, From f2d7908ff761c25e642e45fcabc820318e4ecf92 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Fri, 25 Mar 2022 20:00:33 -0400 Subject: [PATCH 12/17] Adjust MIR validator to check a few more things for terminators --- .../src/transform/validate.rs | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index 7eb91385653e5..4358ec2fff728 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -642,6 +642,9 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { } } TerminatorKind::Yield { resume, drop, .. } => { + if self.body.generator.is_none() { + self.fail(location, "`Yield` cannot appear outside generator bodies"); + } if self.mir_phase >= MirPhase::GeneratorsLowered { self.fail(location, "`Yield` should have been replaced by generator lowering"); } @@ -681,6 +684,9 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { } } TerminatorKind::GeneratorDrop => { + if self.body.generator.is_none() { + self.fail(location, "`GeneratorDrop` cannot appear outside generator bodies"); + } if self.mir_phase >= MirPhase::GeneratorsLowered { self.fail( location, @@ -688,11 +694,19 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { ); } } - // Nothing to validate for these. - TerminatorKind::Resume - | TerminatorKind::Abort - | TerminatorKind::Return - | TerminatorKind::Unreachable => {} + TerminatorKind::Resume | TerminatorKind::Abort => { + let bb = location.block; + if !self.body.basic_blocks()[bb].is_cleanup { + self.fail(location, "Cannot `Resume` from non-cleanup basic block") + } + } + TerminatorKind::Return => { + let bb = location.block; + if self.body.basic_blocks()[bb].is_cleanup { + self.fail(location, "Cannot `Return` from cleanup basic block") + } + } + TerminatorKind::Unreachable => {} } self.super_terminator(terminator, location); From 1d318e42e75b959a45be28efcc5069408c228714 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Fri, 25 Mar 2022 20:08:35 -0400 Subject: [PATCH 13/17] Improve MIR phases documentation with summaries of changes --- compiler/rustc_middle/src/mir/mod.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index a01261c543ba6..4d124da43ecb5 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -127,12 +127,24 @@ pub trait MirPass<'tcx> { /// The various "big phases" that MIR goes through. /// /// These phases all describe dialects of MIR. Since all MIR uses the same datastructures, the -/// dialects forbid certain variants or values in certain phases. +/// dialects forbid certain variants or values in certain phases. The sections below summarize the +/// changes, but do not document them thoroughly. The full documentation is found in the appropriate +/// documentation for the thing the change is affecting. /// /// Warning: ordering of variants is significant. #[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(HashStable)] pub enum MirPhase { + /// The dialect of MIR used during all phases before `DropsLowered` is the same. This is also + /// the MIR that analysis such as borrowck uses. + /// + /// One important thing to remember about the behavior of this section of MIR is that drop terminators + /// (including drop and replace) are *conditional*. The elaborate drops pass will then replace each + /// instance of a drop terminator with a nop, an unconditional drop, or a drop conditioned on a drop + /// flag. Of course, this means that it is important that the drop elaboration can accurately recognize + /// when things are initialized and when things are de-initialized. That means any code running on this + /// version of MIR must be sure to produce output that drop elaboration can reason about. See the + /// section on the drop terminatorss for more details. Built = 0, // FIXME(oli-obk): it's unclear whether we still need this phase (and its corresponding query). // We used to have this for pre-miri MIR based const eval. @@ -162,6 +174,16 @@ pub enum MirPhase { /// And the following variant is allowed: /// * [`StatementKind::SetDiscriminant`] Deaggregated = 4, + /// Before this phase, generators are in the "source code" form, featuring `yield` statements + /// and such. With this phase change, they are transformed into a proper state machine. Running + /// optimizations before this change can be potentially dangerous because the source code is to + /// some extent a "lie." In particular, `yield` terminators effectively make the value of all + /// locals visible to the caller. This means that dead store elimination before them, or code + /// motion across them, is not correct in general. This is also exasperated by type checking + /// having pre-computed a list of the types that it thinks are ok to be live across a yield + /// point - this is necessary to decide eg whether autotraits are implemented. Introducing new + /// types across a yield point will lead to ICEs becaues of this. + /// /// Beginning with this phase, the following variants are disallowed: /// * [`TerminatorKind::Yield`](terminator::TerminatorKind::Yield) /// * [`TerminatorKind::GeneratorDrop](terminator::TerminatorKind::GeneratorDrop) From 411ae6f5ad7d3b4488f280b7bea8498eb1b0d041 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Sat, 26 Mar 2022 20:46:56 -0400 Subject: [PATCH 14/17] Address various comments and change some details around place to value conversions --- .../src/transform/validate.rs | 2 +- compiler/rustc_middle/src/mir/mod.rs | 63 +++++++++---------- compiler/rustc_middle/src/mir/terminator.rs | 8 ++- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index 4358ec2fff728..f153c613f8c51 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -697,7 +697,7 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { TerminatorKind::Resume | TerminatorKind::Abort => { let bb = location.block; if !self.body.basic_blocks()[bb].is_cleanup { - self.fail(location, "Cannot `Resume` from non-cleanup basic block") + self.fail(location, "Cannot `Resume` or `Abort` from non-cleanup basic block") } } TerminatorKind::Return => { diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 4d124da43ecb5..0fd83942f20a4 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -1653,16 +1653,15 @@ pub enum StatementKind<'tcx> { /// statements are not required. If the entire MIR body contains no `StorageLive`/`StorageDead` /// statements for a particular local, the local is always considered live. /// - /// More precisely, the MIR validator currently does a `MaybeLiveLocals` analysis to check - /// validity of each use of a local. I believe this is equivalent to requiring for every use of - /// a local, there exist at least one path from the root to that use that contains a + /// More precisely, the MIR validator currently does a `MaybeStorageLiveLocals` analysis to + /// check validity of each use of a local. I believe this is equivalent to requiring for every + /// use of a local, there exist at least one path from the root to that use that contains a /// `StorageLive` more recently than a `StorageDead`. /// - /// **Needs clarification**: Is it permitted to `StorageLive` a local for which we previously - /// executed `StorageDead`? How about two `StorageLive`s without an intervening `StorageDead`? - /// Two `StorageDead`s without an intervening `StorageLive`? LLVM says yes, poison, yes. If the - /// answer to any of these is "no," is breaking that rule UB or is it an error to have a path in - /// the CFG that might do this? + /// **Needs clarification**: Is it permitted to have two `StorageLive`s without an intervening + /// `StorageDead`? Two `StorageDead`s without an intervening `StorageLive`? LLVM says poison, + /// yes. If the answer to any of these is "no," is breaking that rule UB or is it an error to + /// have a path in the CFG that might do this? StorageLive(Local), /// See `StorageLive` above. @@ -1675,7 +1674,7 @@ pub enum StatementKind<'tcx> { /// for /// more details. /// - /// For code that is not specific to stacked borrows, you should consider statements to read + /// For code that is not specific to stacked borrows, you should consider retags to read /// and modify the place in an opaque way. Retag(RetagKind, Box>), @@ -1704,7 +1703,7 @@ pub enum StatementKind<'tcx> { /// executed. Coverage(Box), - /// Denotes a call to the intrinsic function `copy_overlapping`. + /// Denotes a call to the intrinsic function `copy_nonoverlapping`. /// /// First, all three operands are evaluated. `src` and `dest` must each be a reference, pointer, /// or `Box` pointing to the same type `T`. `count` must evaluate to a `usize`. Then, `src` and @@ -1919,7 +1918,7 @@ pub struct CopyNonOverlapping<'tcx> { /// **Needs clarification**: What about metadata resulting from dereferencing wide pointers (and /// possibly from accessing unsized locals - not sure how those work)? That probably deserves to go /// on the list above and be discussed too. It is also probably necessary for making the indexing -/// stuff lass hand-wavey. +/// stuff less hand-wavey. /// /// **Needs clarification**: When it says "part of memory" what does that mean precisely, and how /// does it interact with the metadata? @@ -2334,13 +2333,13 @@ pub struct SourceScopeLocalData { /// /// The most common way to create values is via a place to value conversion. A place to value /// conversion is an operation which reads the memory of the place and converts it to a value. This -/// is a fundamentally *typed* operation. Different types will do different things. These are some -/// possible examples of what Rust may - but will not necessarily - decide to do on place to value -/// conversions: +/// is a fundamentally *typed* operation. The nature of the value produced depends on the type of +/// the conversion. Furthermore, there may be other effects: if the type has a validity constraint +/// the place to value conversion might be UB if the validity constraint is not met. /// -/// 1. Types with validity constraints cause UB if the validity constraint is not met -/// 2. References/pointers may have their provenance change or cause other provenance related -/// side-effects. +/// **Needs clarification:** Ralf proposes that place to value conversions not have side-effects. +/// This is what is implemented in miri today. Are these the semantics we want for MIR? Is this +/// something we can even decide without knowing more about Rust's memory model? /// /// A place to value conversion on a place that has its variant index set is not well-formed. /// However, note that this rule only applies to places appearing in MIR bodies. Many functions, @@ -2472,15 +2471,17 @@ impl<'tcx> Operand<'tcx> { /// /// Not all of these are allowed at every [`MirPhase`] - when this is the case, it's stated below. /// -/// Computing any rvalue begins by evaluating the places and operands in the rvalue in the order in -/// which they appear. These are then used to produce a "value" - the same kind of value that an -/// [`Operand`] is. +/// Computing any rvalue begins by evaluating the places and operands in some order (**Needs +/// clarification**: Which order?). These are then used to produce a "value" - the same kind of +/// value that an [`Operand`] is. pub enum Rvalue<'tcx> { /// Yields the operand unchanged Use(Operand<'tcx>), - /// Creates an array where each element is the value of the operand. This currently does not - /// drop the value even if the number of repetitions is zero, see [#74836]. + /// Creates an array where each element is the value of the operand. + /// + /// This is the cause of a bug in the case where the repetition count is zero because the value + /// is not dropped, see [#74836]. /// /// Corresponds to source code like `[x; 32]`. /// @@ -2534,12 +2535,12 @@ pub enum Rvalue<'tcx> { Cast(CastKind, Operand<'tcx>, Ty<'tcx>), /// * `Offset` has the same semantics as [`offset`](pointer::offset), except that the second - /// paramter may be a `usize` as well. + /// parameter may be a `usize` as well. /// * The comparison operations accept `bool`s, `char`s, signed or unsigned integers, floats, /// raw pointers, or function pointers and return a `bool`. /// * Left and right shift operations accept signed or unsigned integers not necessarily of the /// same type and return a value of the same type as their LHS. For all other operations, the - /// types of the operands must match. + /// types of the operands must match. Like in Rust, the RHS is truncated as needed. /// * The `Bit*` operations accept signed integers, unsigned integers, or bools and return a /// value of that type. /// * The remaining operations accept signed integers, unsigned integers, or floats of any @@ -2548,21 +2549,19 @@ pub enum Rvalue<'tcx> { /// Same as `BinaryOp`, but yields `(T, bool)` instead of `T`. In addition to performing the /// same computation as the matching `BinaryOp`, checks if the infinite precison result would be - /// unequal to the actual result and sets the `bool` if this is the case. `BinOp::Offset` is not - /// allowed here. + /// unequal to the actual result and sets the `bool` if this is the case. /// - /// **FIXME**: What about division/modulo? Are they allowed here at all? Are zero divisors still - /// UB? Also, which other combinations of types are disallowed? + /// This only supports addition, subtraction, multiplication, and shift operations. CheckedBinaryOp(BinOp, Box<(Operand<'tcx>, Operand<'tcx>)>), - /// Yields the size or alignment of the type as a `usize`. + /// Computes a value as described by the operation. NullaryOp(NullOp, Ty<'tcx>), /// Exactly like `BinaryOp`, but less operands. /// - /// Also does two's-complement arithmetic. Negation requires a signed integer or a float; binary - /// not requires a signed integer, unsigned integer, or bool. Both operation kinds return a - /// value with the same type as their operand. + /// Also does two's-complement arithmetic. Negation requires a signed integer or a float; + /// bitwise not requires a signed integer, unsigned integer, or bool. Both operation kinds + /// return a value with the same type as their operand. UnaryOp(UnOp, Operand<'tcx>), /// Computes the discriminant of the place, returning it as an integer of type diff --git a/compiler/rustc_middle/src/mir/terminator.rs b/compiler/rustc_middle/src/mir/terminator.rs index bf68835235da6..cc08857463d58 100644 --- a/compiler/rustc_middle/src/mir/terminator.rs +++ b/compiler/rustc_middle/src/mir/terminator.rs @@ -234,10 +234,12 @@ pub enum TerminatorKind<'tcx> { /// Roughly speaking, evaluates the `func` operand and the arguments, and starts execution of /// the referred to function. The operand types must match the argument types of the function. - /// The return place type must exactly match the return type. The type of the `func` operand - /// must be callable, meaning either a function pointer, a function type, or a closure type. + /// The return place type must match the return type. The type of the `func` operand must be + /// callable, meaning either a function pointer, a function type, or a closure type. /// - /// **Needs clarification**: The exact semantics of this, see [#71117]. + /// **Needs clarification**: The exact semantics of this. Current backends rely on `move` + /// operands not aliasing the return place. It is unclear how this is justified in MIR, see + /// [#71117]. /// /// [#71117]: https://github.com/rust-lang/rust/issues/71117 Call { From 4bce639c3b98ceb9e8a8896f9d4a7f7d6db79ba1 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Fri, 8 Apr 2022 15:53:08 -0400 Subject: [PATCH 15/17] Add more clarifications in response to Ralf's comments --- compiler/rustc_middle/src/mir/mod.rs | 186 ++++++++++++--------------- 1 file changed, 84 insertions(+), 102 deletions(-) diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 0fd83942f20a4..c690d2b9d334d 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -1598,9 +1598,9 @@ pub enum StatementKind<'tcx> { /// Assign statements roughly correspond to an assignment in Rust proper (`x = ...`) except /// without the possibility of dropping the previous value (that must be done separately, if at /// all). The *exact* way this works is undecided. It probably does something like evaluating - /// the LHS and RHS, and then doing the inverse of a place to value conversion to write the - /// resulting value into memory. Various parts of this may do type specific things that are more - /// complicated than simply copying over the bytes depending on the types. + /// the LHS to a place and the RHS to a value, and then storing the value to the place. Various + /// parts of this may do type specific things that are more complicated than simply copying + /// bytes. /// /// **Needs clarification**: The implication of the above idea would be that assignment implies /// that the resulting value is initialized. I believe we could commit to this separately from @@ -1615,8 +1615,9 @@ pub enum StatementKind<'tcx> { /// interesting for optimizations? Do we want to allow such optimizations? /// /// **Needs clarification**: We currently require that the LHS place not overlap with any place - /// read as part of computation of the RHS. This requirement is under discussion in [#68364]. As - /// a part of this discussion, it is also unclear in what order the components are evaluated. + /// read as part of computation of the RHS for some rvalues (generally those not producing + /// primitives). This requirement is under discussion in [#68364]. As a part of this discussion, + /// it is also unclear in what order the components are evaluated. /// /// [#68364]: https://github.com/rust-lang/rust/issues/68364 /// @@ -1714,9 +1715,8 @@ pub enum StatementKind<'tcx> { /// **Needs clarification**: In what order are operands computed and dereferenced? It should /// probably match the order for assignment, but that is also undecided. /// - /// **Needs clarification**: Is this typed or not, ie is there a place to value and back - /// conversion involved? I vaguely remember Ralf saying somewhere that he thought it should not - /// be. + /// **Needs clarification**: Is this typed or not, ie is there a typed load and store involved? + /// I vaguely remember Ralf saying somewhere that he thought it should not be. CopyNonOverlapping(Box>), /// No-op. Useful for deleting instructions without affecting statement indices. @@ -1868,41 +1868,55 @@ pub struct CopyNonOverlapping<'tcx> { /// Places roughly correspond to a "location in memory." Places in MIR are the same mathematical /// object as places in Rust. This of course means that what exactly they are is undecided and part -/// of the Rust memory model. However, they will likely contain at least the following three pieces -/// of information in some form: +/// of the Rust memory model. However, they will likely contain at least the following pieces of +/// information in some form: /// -/// 1. The part of memory that is referred to (see discussion below for details). -/// 2. The type of the place and an optional variant index. See [`PlaceTy`][tcx::PlaceTy] -/// 3. The provenance with which the place is being accessed. +/// 1. The address in memory that the place refers to. +/// 2. The provenance with which the place is being accessed. +/// 3. The type of the place and an optional variant index. See [`PlaceTy`][tcx::PlaceTy]. +/// 4. Optionally, some metadata. This exists if and only if the type of the place is not `Sized`. /// -/// We'll give a description below of how the first two of these three properties are computed for a -/// place. We cannot give a description of the provenance, because that is part of the undecided -/// aliasing model - we only include it here at all to acknowledge its existence. +/// We'll give a description below of how all pieces of the place except for the provenance are +/// calculated. We cannot give a description of the provenance, because that is part of the +/// undecided aliasing model - we only include it here at all to acknowledge its existence. /// -/// For a place that has no projections, ie `Place { local, projection: [] }`, the part of memory is -/// the local's full allocation and the type is the type of the local. For any other place, we -/// define the values as a function of the parent place, that is the place with its last -/// [`ProjectionElem`] stripped. The way this is computed of course depends on the kind of that last -/// projection element: +/// Each local naturally corresponds to the place `Place { local, projection: [] }`. This place has +/// the address of the local's allocation and the type of the local. +/// +/// **Needs clarification:** Unsized locals seem to present a bit of an issue. Their allocation +/// can't actually be created on `StorageLive`, because it's unclear how big to make the allocation. +/// Furthermore, MIR produces assignments to unsized locals, although that is not permitted under +/// `#![feature(unsized_locals)]` in Rust. Besides just putting "unsized locals are special and +/// different" in a bunch of places, I (JakobDegen) don't know how to incorporate this behavior into +/// the current MIR semantics in a clean way - possibly this needs some design work first. +/// +/// For places that are not locals, ie they have a non-empty list of projections, we define the +/// values as a function of the parent place, that is the place with its last [`ProjectionElem`] +/// stripped. The way this is computed of course depends on the kind of that last projection +/// element: /// /// - [`Downcast`](ProjectionElem::Downcast): This projection sets the place's variant index to the /// given one, and makes no other changes. A `Downcast` projection on a place with its variant /// index already set is not well-formed. /// - [`Field`](ProjectionElem::Field): `Field` projections take their parent place and create a -/// place referring to one of the fields of the type. The referred to place in memory is where -/// the layout places the field. The type becomes the type of the field. +/// place referring to one of the fields of the type. The resulting address is the parent +/// address, plus the offset of the field. The type becomes the type of the field. If the parent +/// was unsized and so had metadata associated with it, then the metadata is retained if the +/// field is unsized and thrown out if it is sized. /// /// These projections are only legal for tuples, ADTs, closures, and generators. If the ADT or /// generator has more than one variant, the parent place's variant index must be set, indicating /// which variant is being used. If it has just one variant, the variant index may or may not be /// included - the single possible variant is inferred if it is not included. /// - [`ConstantIndex`](ProjectionElem::ConstantIndex): Computes an offset in units of `T` into the -/// place as described in the documentation for the `ProjectionElem`. The resulting part of -/// memory is the location of that element of the array/slice, and the type is `T`. This is only -/// legal if the parent place has type `[T; N]` or `[T]` (*not* `&[T]`). -/// - [`Subslice`](ProjectionElem::Subslice): Much like `ConstantIndex`. It is also only legal on -/// `[T; N]` and `[T]`. However, this yields a `Place` of type `[T]`, and may refer to more than -/// one element in the parent place. +/// place as described in the documentation for the `ProjectionElem`. The resulting address is +/// the parent's address plus that offset, and the type is `T`. This is only legal if the parent +/// place has type `[T; N]` or `[T]` (*not* `&[T]`). Since such a `T` is always sized, any +/// resulting metadata is thrown out. +/// - [`Subslice`](ProjectionElem::Subslice): This projection calculates an offset and a new +/// address in a similar manner as `ConstantIndex`. It is also only legal on `[T; N]` and `[T]`. +/// However, this yields a `Place` of type `[T]`, and additionally sets the metadata to be the +/// length of the subslice. /// - [`Index`](ProjectionElem::Index): Like `ConstantIndex`, only legal on `[T; N]` or `[T]`. /// However, `Index` additionally takes a local from which the value of the index is computed at /// runtime. Computing the value of the index involves interpreting the `Local` as a @@ -1911,53 +1925,23 @@ pub struct CopyNonOverlapping<'tcx> { /// have type `usize`. /// - [`Deref`](ProjectionElem::Deref): Derefs are the last type of projection, and the most /// complicated. They are only legal on parent places that are references, pointers, or `Box`. A -/// `Deref` projection begins by creating a value from the parent place, as if by +/// `Deref` projection begins by loading a value from the parent place, as if by /// [`Operand::Copy`]. It then dereferences the resulting pointer, creating a place of the -/// pointed to type. -/// -/// **Needs clarification**: What about metadata resulting from dereferencing wide pointers (and -/// possibly from accessing unsized locals - not sure how those work)? That probably deserves to go -/// on the list above and be discussed too. It is also probably necessary for making the indexing -/// stuff less hand-wavey. -/// -/// **Needs clarification**: When it says "part of memory" what does that mean precisely, and how -/// does it interact with the metadata? +/// pointee's type. The resulting address is the address that was stored in the pointer. If the +/// pointee type is unsized, the pointer additionally stored the value of the metadata. /// -/// One possible model that I believe makes sense is that "part of memory" is actually just the -/// address of the beginning of the referred to range of bytes. For sized types, the size of the -/// range is then stored in the type, and for unsized types it's stored (possibly indirectly, -/// through a vtable) in the metadata. +/// Computing a place may cause UB. One possibility is that the pointer used for a `Deref` may not +/// be suitably aligned. Another possibility is that the place is not in bouns, meaning it does not +/// point to an actual allocation. /// -/// Alternatively, the "part of memory" could be a whole range of bytes. Initially seemed more -/// natural to me, but seems like it falls apart after a little bit. -/// -/// More likely though, we should call this detail a part of the Rust memory model and let that deal -/// with the precise definition of this part of a place. If we feel strongly, I don't think we *have -/// to* though. MIR places are more flexible than Rust places, and we might be able to make a -/// decision on the flexible parts without semi-stabilizing the source language. (end NC) -/// -/// Computing a place may be UB - this is certainly the case with dereferencing, which requires -/// sufficient provenance, but it may additionally be the case for some of the other field -/// projections. -/// -/// It is undecided when this UB kicks in. As best I can tell that is the question being discussed -/// in [UCG#319]. Summarizing from that thread, I believe the options are: +/// However, if this is actually UB and when the UB kicks in is undecided. This is being discussed +/// in [UCG#319]. The options include that every place must obey those rules, that only some places +/// must obey them, or that places impose no rules of their own. /// /// [UCG#319]: https://github.com/rust-lang/unsafe-code-guidelines/issues/319 /// -/// 1. Each intermediate place must have provenance for the whole part of memory it refers to. This -/// is the status quo. -/// 2. Only for intermediate place where the last projection was *not* a deref. This corresponds to -/// "Check inbounds on place projection". -/// 3. Only on place to value conversions, assignments, and referencing operation. This corresponds -/// to "remove the restrictions from `*` entirely." -/// 4. On each intermediate place if the place is used for a place to value conversion as part of -/// an assignment assignment or it is used for a referencing operation. For a raw pointer -/// computation, never. This corresponds to "magic?". -/// -/// Hopefully I am not misrepresenting anyone's opinions - please let me know if I am. Currently, -/// Rust chooses option 1. This is checked by MIRI and taken advantage of by codegen (via `gep -/// inbounds`). That is possibly subject to change. +/// Rust currently requires that every place obey those two rules. This is checked by MIRI and taken +/// advantage of by codegen (via `gep inbounds`). That is possibly subject to change. #[derive(Copy, Clone, PartialEq, Eq, Hash, TyEncodable, HashStable)] pub struct Place<'tcx> { pub local: Local, @@ -2331,32 +2315,30 @@ pub struct SourceScopeLocalData { /// /// [value-def]: https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/value-domain.md /// -/// The most common way to create values is via a place to value conversion. A place to value -/// conversion is an operation which reads the memory of the place and converts it to a value. This -/// is a fundamentally *typed* operation. The nature of the value produced depends on the type of -/// the conversion. Furthermore, there may be other effects: if the type has a validity constraint -/// the place to value conversion might be UB if the validity constraint is not met. +/// The most common way to create values is via loading a place. Loading a place is an operation +/// which reads the memory of the place and converts it to a value. This is a fundamentally *typed* +/// operation. The nature of the value produced depends on the type of the conversion. Furthermore, +/// there may be other effects: if the type has a validity constraint loading the place might be UB +/// if the validity constraint is not met. /// -/// **Needs clarification:** Ralf proposes that place to value conversions not have side-effects. +/// **Needs clarification:** Ralf proposes that loading a place not have side-effects. /// This is what is implemented in miri today. Are these the semantics we want for MIR? Is this /// something we can even decide without knowing more about Rust's memory model? /// -/// A place to value conversion on a place that has its variant index set is not well-formed. -/// However, note that this rule only applies to places appearing in MIR bodies. Many functions, -/// such as [`Place::ty`], still accept such a place. If you write a function for which it might be -/// ambiguous whether such a thing is accepted, make sure to document your choice clearly. +/// Loading a place that has its variant index set is not well-formed. However, note that this rule +/// only applies to places appearing in MIR bodies. Many functions, such as [`Place::ty`], still +/// accept such a place. If you write a function for which it might be ambiguous whether such a +/// thing is accepted, make sure to document your choice clearly. #[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] pub enum Operand<'tcx> { - /// Creates a value by performing a place to value conversion at the given place. The type of - /// the place must be `Copy` + /// Creates a value by loading the given place. The type of the place must be `Copy` Copy(Place<'tcx>), - /// Creates a value by performing a place to value conversion for the place, just like the - /// `Copy` operand. + /// Creates a value by performing loading the place, just like the `Copy` operand. /// /// This *may* additionally overwrite the place with `uninit` bytes, depending on how we decide - /// in [UCG#188]. You should not emit MIR that may attempt a subsequent second place to value - /// conversion on this place without first re-initializing it. + /// in [UCG#188]. You should not emit MIR that may attempt a subsequent second load of this + /// place without first re-initializing it. /// /// [UCG#188]: https://github.com/rust-lang/unsafe-code-guidelines/issues/188 Move(Place<'tcx>), @@ -2473,7 +2455,7 @@ impl<'tcx> Operand<'tcx> { /// /// Computing any rvalue begins by evaluating the places and operands in some order (**Needs /// clarification**: Which order?). These are then used to produce a "value" - the same kind of -/// value that an [`Operand`] is. +/// value that an [`Operand`] produces. pub enum Rvalue<'tcx> { /// Yields the operand unchanged Use(Operand<'tcx>), @@ -2497,14 +2479,14 @@ pub enum Rvalue<'tcx> { /// `Shallow` borrows are disallowed after drop lowering. Ref(Region<'tcx>, BorrowKind, Place<'tcx>), - /// Returns a pointer/reference to the given thread local. + /// Creates a pointer/reference to the given thread local. /// /// The yielded type is a `*mut T` if the static is mutable, otherwise if the static is extern a /// `*const T`, and if neither of those apply a `&T`. /// /// **Note:** This is a runtime operation that actually executes code and is in this sense more - /// like a function call. Also, DSEing these causes `fn main() {}` to SIGILL for some reason - /// that I never got a chance to look into. + /// like a function call. Also, eliminating dead stores of this rvalue causes `fn main() {}` to + /// SIGILL for some reason that I (JakobDegen) never got a chance to look into. /// /// **Needs clarification**: Are there weird additional semantics here related to the runtime /// nature of this operation? @@ -2521,9 +2503,9 @@ pub enum Rvalue<'tcx> { /// Yields the length of the place, as a `usize`. /// - /// If the type of the place is an array, this is the array length. This also works for slices - /// (`[T]`, not `&[T]`) through some mechanism that depends on how exactly places work (see - /// there for more details). + /// If the type of the place is an array, this is the array length. For slices (`[T]`, not + /// `&[T]`) this accesses the place's metadata to determine the length. This rvalue is + /// ill-formed for places of other types. Len(Place<'tcx>), /// Performs essentially all of the casts that can be performed via `as`. @@ -2537,21 +2519,21 @@ pub enum Rvalue<'tcx> { /// * `Offset` has the same semantics as [`offset`](pointer::offset), except that the second /// parameter may be a `usize` as well. /// * The comparison operations accept `bool`s, `char`s, signed or unsigned integers, floats, - /// raw pointers, or function pointers and return a `bool`. + /// raw pointers, or function pointers of matching types and return a `bool`. /// * Left and right shift operations accept signed or unsigned integers not necessarily of the - /// same type and return a value of the same type as their LHS. For all other operations, the - /// types of the operands must match. Like in Rust, the RHS is truncated as needed. - /// * The `Bit*` operations accept signed integers, unsigned integers, or bools and return a - /// value of that type. - /// * The remaining operations accept signed integers, unsigned integers, or floats of any - /// matching type and return a value of that type. + /// same type and return a value of the same type as their LHS. Like in Rust, the RHS is + /// truncated as needed. + /// * The `Bit*` operations accept signed integers, unsigned integers, or bools with matching + /// types and return a value of that type. + /// * The remaining operations accept signed integers, unsigned integers, or floats with + /// matching types and return a value of that type. BinaryOp(BinOp, Box<(Operand<'tcx>, Operand<'tcx>)>), /// Same as `BinaryOp`, but yields `(T, bool)` instead of `T`. In addition to performing the /// same computation as the matching `BinaryOp`, checks if the infinite precison result would be /// unequal to the actual result and sets the `bool` if this is the case. /// - /// This only supports addition, subtraction, multiplication, and shift operations. + /// This only supports addition, subtraction, multiplication, and shift operations on integers. CheckedBinaryOp(BinOp, Box<(Operand<'tcx>, Operand<'tcx>)>), /// Computes a value as described by the operation. @@ -2592,7 +2574,7 @@ pub enum Rvalue<'tcx> { /// Transmutes a `*mut u8` into shallow-initialized `Box`. /// - /// This is different a normal transmute because dataflow analysis will treat the box as + /// This is different from a normal transmute because dataflow analysis will treat the box as /// initialized but its content as uninitialized. Like other pointer casts, this in general /// affects alias analysis. /// From 8732bf5db372e3b9297e854ce71851bbc6a90893 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Sat, 9 Apr 2022 10:00:19 -0400 Subject: [PATCH 16/17] Remove rule that place loads may not happen with variant index set --- compiler/rustc_const_eval/src/transform/validate.rs | 7 ++----- compiler/rustc_middle/src/mir/mod.rs | 9 ++++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index f153c613f8c51..01af95851357e 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -246,12 +246,9 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { self.super_projection_elem(local, proj_base, elem, context, location); } - fn visit_place(&mut self, place: &Place<'tcx>, _: PlaceContext, location: Location) { + fn visit_place(&mut self, place: &Place<'tcx>, _: PlaceContext, _: Location) { // Set off any `bug!`s in the type computation code - let ty = place.ty(&self.body.local_decls, self.tcx); - if ty.variant_index.is_some() { - self.fail(location, "Top level places may not have their variant index set!"); - } + let _ = place.ty(&self.body.local_decls, self.tcx); } fn visit_rvalue(&mut self, rvalue: &Rvalue<'tcx>, location: Location) { diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index c690d2b9d334d..9f7832c8a64a2 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -1931,7 +1931,7 @@ pub struct CopyNonOverlapping<'tcx> { /// pointee type is unsized, the pointer additionally stored the value of the metadata. /// /// Computing a place may cause UB. One possibility is that the pointer used for a `Deref` may not -/// be suitably aligned. Another possibility is that the place is not in bouns, meaning it does not +/// be suitably aligned. Another possibility is that the place is not in bounds, meaning it does not /// point to an actual allocation. /// /// However, if this is actually UB and when the UB kicks in is undecided. This is being discussed @@ -2325,10 +2325,9 @@ pub struct SourceScopeLocalData { /// This is what is implemented in miri today. Are these the semantics we want for MIR? Is this /// something we can even decide without knowing more about Rust's memory model? /// -/// Loading a place that has its variant index set is not well-formed. However, note that this rule -/// only applies to places appearing in MIR bodies. Many functions, such as [`Place::ty`], still -/// accept such a place. If you write a function for which it might be ambiguous whether such a -/// thing is accepted, make sure to document your choice clearly. +/// **Needs clarifiation:** Is loading a place that has its variant index set well-formed? Miri +/// currently implements it, but it seems like this may be something to check against in the +/// validator. #[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] pub enum Operand<'tcx> { /// Creates a value by loading the given place. The type of the place must be `Copy` From 29c41280a123163827d76b82f251965975c468e8 Mon Sep 17 00:00:00 2001 From: Takayuki Maeda Date: Sun, 10 Apr 2022 20:39:02 +0900 Subject: [PATCH 17/17] use `to_string` instead of `format!` --- compiler/rustc_parse/src/parser/item.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 5bf6f22b5d064..ca81921faedcc 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -970,7 +970,7 @@ impl<'a> Parser<'a> { } if fixed_crate_name { let fixed_name_sp = ident.span.to(idents.last().unwrap().span); - let mut fixed_name = format!("{}", ident.name); + let mut fixed_name = ident.name.to_string(); for part in idents { fixed_name.push_str(&format!("_{}", part.name)); }