Skip to content

Commit

Permalink
Rollup merge of rust-lang#111543 - Urgau:uplift_invalid_utf8_in_unche…
Browse files Browse the repository at this point in the history
…cked, r=WaffleLapkin

Uplift `clippy::invalid_utf8_in_unchecked` lint

This PR aims at uplifting the `clippy::invalid_utf8_in_unchecked` lint into two lints.

## `invalid_from_utf8_unchecked`

(deny-by-default)

The `invalid_from_utf8_unchecked` lint checks for calls to `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut` with an invalid UTF-8 literal.

### Example

```rust
unsafe {
    std::str::from_utf8_unchecked(b"cl\x82ippy");
}
```

### Explanation

Creating such a `str` would result in undefined behavior as per documentation for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.

## `invalid_from_utf8`

(warn-by-default)

The `invalid_from_utf8` lint checks for calls to `std::str::from_utf8` and `std::str::from_utf8_mut` with an invalid UTF-8 literal.

### Example

```rust
std::str::from_utf8(b"ru\x82st");
```

### Explanation

Trying to create such a `str` would always return an error as per documentation for `std::str::from_utf8` and `std::str::from_utf8_mut`.

-----

Mostly followed the instructions for uplifting a clippy lint described here: rust-lang#99696 (review)

`@rustbot` label: +I-lang-nominated
r? compiler

-----

For Clippy:

changelog: Moves: Uplifted `clippy::invalid_utf8_in_unchecked` into rustc
  • Loading branch information
Noratrieb authored May 30, 2023
2 parents bbee221 + b84c190 commit e18b026
Show file tree
Hide file tree
Showing 19 changed files with 423 additions and 168 deletions.
8 changes: 8 additions & 0 deletions compiler/rustc_lint/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,14 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
lint_lintpass_by_hand = implementing `LintPass` by hand
.help = try using `declare_lint_pass!` or `impl_lint_pass!` instead
Expand Down
118 changes: 118 additions & 0 deletions compiler/rustc_lint/src/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use std::str::Utf8Error;

use rustc_ast::{BorrowKind, LitKind};
use rustc_hir::{Expr, ExprKind};
use rustc_span::source_map::Spanned;
use rustc_span::sym;

use crate::lints::InvalidFromUtf8Diag;
use crate::{LateContext, LateLintPass, LintContext};

declare_lint! {
/// The `invalid_from_utf8_unchecked` lint checks for calls to
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
/// with an invalid UTF-8 literal.
///
/// ### Example
///
/// ```rust,compile_fail
/// # #[allow(unused)]
/// unsafe {
/// std::str::from_utf8_unchecked(b"Ru\x82st");
/// }
/// ```
///
/// {{produces}}
///
/// ### Explanation
///
/// Creating such a `str` would result in undefined behavior as per documentation
/// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.
pub INVALID_FROM_UTF8_UNCHECKED,
Deny,
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
}

declare_lint! {
/// The `invalid_from_utf8` lint checks for calls to
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
/// with an invalid UTF-8 literal.
///
/// ### Example
///
/// ```rust
/// # #[allow(unused)]
/// std::str::from_utf8(b"Ru\x82st");
/// ```
///
/// {{produces}}
///
/// ### Explanation
///
/// Trying to create such a `str` would always return an error as per documentation
/// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
pub INVALID_FROM_UTF8,
Warn,
"using a non UTF-8 literal in `std::str::from_utf8`"
}

declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);

impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
if let ExprKind::Call(path, [arg]) = expr.kind
&& let ExprKind::Path(ref qpath) = path.kind
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
{
let lint = |utf8_error: Utf8Error| {
let label = arg.span;
let method = diag_item.as_str().strip_prefix("str_").unwrap();
let method = format!("std::str::{method}");
let valid_up_to = utf8_error.valid_up_to();
let is_unchecked_variant = diag_item.as_str().contains("unchecked");

cx.emit_spanned_lint(
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
expr.span,
if is_unchecked_variant {
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
} else {
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
}
)
};

match &arg.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes, _) = &lit
&& let Err(utf8_error) = std::str::from_utf8(bytes)
{
lint(utf8_error);
}
},
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
let elements = args.iter().map(|e|{
match &e.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
LitKind::Byte(b) => Some(*b),
LitKind::Int(b, _) => Some(*b as u8),
_ => None
}
_ => None
}
}).collect::<Option<Vec<_>>>();

if let Some(elements) = elements
&& let Err(utf8_error) = std::str::from_utf8(&elements)
{
lint(utf8_error);
}
}
_ => {}
}
}
}
}
3 changes: 3 additions & 0 deletions compiler/rustc_lint/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ mod expect;
mod for_loops_over_fallibles;
pub mod hidden_unicode_codepoints;
mod internal;
mod invalid_from_utf8;
mod late;
mod let_underscore;
mod levels;
Expand Down Expand Up @@ -102,6 +103,7 @@ use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
use for_loops_over_fallibles::*;
use hidden_unicode_codepoints::*;
use internal::*;
use invalid_from_utf8::*;
use let_underscore::*;
use map_unit_fn::*;
use methods::*;
Expand Down Expand Up @@ -207,6 +209,7 @@ late_lint_methods!(
HardwiredLints: HardwiredLints,
ImproperCTypesDeclarations: ImproperCTypesDeclarations,
ImproperCTypesDefinitions: ImproperCTypesDefinitions,
InvalidFromUtf8: InvalidFromUtf8,
VariantSizeDifferences: VariantSizeDifferences,
BoxPointers: BoxPointers,
PathStatements: PathStatements,
Expand Down
19 changes: 19 additions & 0 deletions compiler/rustc_lint/src/lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,25 @@ pub struct ForgetCopyDiag<'a> {
pub label: Span,
}

// invalid_from_utf8.rs
#[derive(LintDiagnostic)]
pub enum InvalidFromUtf8Diag {
#[diag(lint_invalid_from_utf8_unchecked)]
Unchecked {
method: String,
valid_up_to: usize,
#[label]
label: Span,
},
#[diag(lint_invalid_from_utf8_checked)]
Checked {
method: String,
valid_up_to: usize,
#[label]
label: Span,
},
}

// hidden_unicode_codepoints.rs
#[derive(LintDiagnostic)]
#[diag(lint_hidden_unicode_codepoints)]
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,10 @@ symbols! {
stop_after_dataflow,
store,
str,
str_from_utf8,
str_from_utf8_mut,
str_from_utf8_unchecked,
str_from_utf8_unchecked_mut,
str_split_whitespace,
str_trim,
str_trim_end,
Expand Down
2 changes: 2 additions & 0 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg_attr(not(bootstrap), allow(invalid_from_utf8))]

use std::assert_matches::assert_matches;
use std::borrow::Cow;
use std::cmp::Ordering::{Equal, Greater, Less};
Expand Down
4 changes: 4 additions & 0 deletions library/core/src/str/converts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ use super::Utf8Error;
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
#[rustc_allow_const_fn_unstable(str_internals)]
#[rustc_diagnostic_item = "str_from_utf8"]
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
// FIXME: This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Expand Down Expand Up @@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
/// errors that can be returned.
#[stable(feature = "str_mut_extras", since = "1.20.0")]
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
#[rustc_diagnostic_item = "str_from_utf8_mut"]
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
// This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Expand Down Expand Up @@ -167,6 +169,7 @@ pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_unchecked", since = "1.55.0")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked"]
pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
// SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8.
// Also relies on `&str` and `&[u8]` having the same layout.
Expand Down Expand Up @@ -194,6 +197,7 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
#[must_use]
#[stable(feature = "str_mut_extras", since = "1.20.0")]
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "91005")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked_mut"]
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
// SAFETY: the caller must guarantee that the bytes `v`
// are valid UTF-8, thus the cast to `*mut str` is safe.
Expand Down
1 change: 0 additions & 1 deletion src/tools/clippy/clippy_lints/src/declared_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
crate::instant_subtraction::UNCHECKED_DURATION_SUBTRACTION_INFO,
crate::int_plus_one::INT_PLUS_ONE_INFO,
crate::invalid_upcast_comparisons::INVALID_UPCAST_COMPARISONS_INFO,
crate::invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED_INFO,
crate::items_after_statements::ITEMS_AFTER_STATEMENTS_INFO,
crate::items_after_test_module::ITEMS_AFTER_TEST_MODULE_INFO,
crate::iter_not_returning_iterator::ITER_NOT_RETURNING_ITERATOR_INFO,
Expand Down
74 changes: 0 additions & 74 deletions src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/tools/clippy/clippy_lints/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ mod inline_fn_without_body;
mod instant_subtraction;
mod int_plus_one;
mod invalid_upcast_comparisons;
mod invalid_utf8_in_unchecked;
mod items_after_statements;
mod items_after_test_module;
mod iter_not_returning_iterator;
Expand Down Expand Up @@ -937,7 +936,6 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
store.register_late_pass(move |_| Box::new(manual_retain::ManualRetain::new(msrv())));
let verbose_bit_mask_threshold = conf.verbose_bit_mask_threshold;
store.register_late_pass(move |_| Box::new(operators::Operators::new(verbose_bit_mask_threshold)));
store.register_late_pass(|_| Box::new(invalid_utf8_in_unchecked::InvalidUtf8InUnchecked));
store.register_late_pass(|_| Box::<std_instead_of_core::StdReexports>::default());
store.register_late_pass(move |_| Box::new(instant_subtraction::InstantSubtraction::new(msrv())));
store.register_late_pass(|_| Box::new(partialeq_to_none::PartialeqToNone));
Expand Down
1 change: 1 addition & 0 deletions src/tools/clippy/clippy_lints/src/renamed_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub static RENAMED_LINTS: &[(&str, &str)] = &[
("clippy::into_iter_on_array", "array_into_iter"),
("clippy::invalid_atomic_ordering", "invalid_atomic_ordering"),
("clippy::invalid_ref", "invalid_value"),
("clippy::invalid_utf8_in_unchecked", "invalid_from_utf8_unchecked"),
("clippy::let_underscore_drop", "let_underscore_drop"),
("clippy::mem_discriminant_non_enum", "enum_intrinsics_non_enums"),
("clippy::panic_params", "non_fmt_panics"),
Expand Down
20 changes: 0 additions & 20 deletions src/tools/clippy/tests/ui/invalid_utf8_in_unchecked.rs

This file was deleted.

22 changes: 0 additions & 22 deletions src/tools/clippy/tests/ui/invalid_utf8_in_unchecked.stderr

This file was deleted.

2 changes: 2 additions & 0 deletions src/tools/clippy/tests/ui/rename.fixed
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#![allow(array_into_iter)]
#![allow(invalid_atomic_ordering)]
#![allow(invalid_value)]
#![allow(invalid_from_utf8_unchecked)]
#![allow(let_underscore_drop)]
#![allow(enum_intrinsics_non_enums)]
#![allow(non_fmt_panics)]
Expand Down Expand Up @@ -87,6 +88,7 @@
#![warn(array_into_iter)]
#![warn(invalid_atomic_ordering)]
#![warn(invalid_value)]
#![warn(invalid_from_utf8_unchecked)]
#![warn(let_underscore_drop)]
#![warn(enum_intrinsics_non_enums)]
#![warn(non_fmt_panics)]
Expand Down
Loading

0 comments on commit e18b026

Please sign in to comment.