From de646e10db2141fc9fffddf103611fe7d1fd9d68 Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Sat, 2 Jul 2022 22:24:26 -0400 Subject: [PATCH] Add `invalid_utf8_in_unchecked` --- CHANGELOG.md | 1 + clippy_lints/src/invalid_utf8_in_unchecked.rs | 74 +++++++++++++++++++ clippy_lints/src/lib.register_all.rs | 1 + clippy_lints/src/lib.register_correctness.rs | 1 + clippy_lints/src/lib.register_lints.rs | 1 + clippy_lints/src/lib.rs | 2 + clippy_utils/src/paths.rs | 1 + tests/ui/invalid_utf8_in_unchecked.rs | 20 +++++ tests/ui/invalid_utf8_in_unchecked.stderr | 22 ++++++ 9 files changed, 123 insertions(+) create mode 100644 clippy_lints/src/invalid_utf8_in_unchecked.rs create mode 100644 tests/ui/invalid_utf8_in_unchecked.rs create mode 100644 tests/ui/invalid_utf8_in_unchecked.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index 71e498c301b2..1b792736a6ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3641,6 +3641,7 @@ Released 2018-09-13 [`invalid_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_ref [`invalid_regex`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_regex [`invalid_upcast_comparisons`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_upcast_comparisons +[`invalid_utf8_in_unchecked`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_utf8_in_unchecked [`invisible_characters`]: https://rust-lang.github.io/rust-clippy/master/index.html#invisible_characters [`is_digit_ascii_radix`]: https://rust-lang.github.io/rust-clippy/master/index.html#is_digit_ascii_radix [`items_after_statements`]: https://rust-lang.github.io/rust-clippy/master/index.html#items_after_statements diff --git a/clippy_lints/src/invalid_utf8_in_unchecked.rs b/clippy_lints/src/invalid_utf8_in_unchecked.rs new file mode 100644 index 000000000000..e0a607f9a95b --- /dev/null +++ b/clippy_lints/src/invalid_utf8_in_unchecked.rs @@ -0,0 +1,74 @@ +use clippy_utils::diagnostics::span_lint; +use clippy_utils::{match_function_call, paths}; +use rustc_ast::{BorrowKind, LitKind}; +use rustc_hir::{Expr, ExprKind}; +use rustc_lint::{LateContext, LateLintPass}; +use rustc_session::{declare_lint_pass, declare_tool_lint}; +use rustc_span::source_map::Spanned; +use rustc_span::Span; + +declare_clippy_lint! { + /// ### What it does + /// Checks for `std::str::from_utf8_unchecked` with an invalid UTF-8 literal + /// + /// ### Why is this bad? + /// Creating such a `str` would result in undefined behavior + /// + /// ### Example + /// ```rust + /// # #[allow(unused)] + /// unsafe { + /// std::str::from_utf8_unchecked(b"cl\x82ippy"); + /// } + /// ``` + #[clippy::version = "1.64.0"] + pub INVALID_UTF8_IN_UNCHECKED, + correctness, + "using a non UTF-8 literal in `std::std::from_utf8_unchecked`" +} +declare_lint_pass!(InvalidUtf8InUnchecked => [INVALID_UTF8_IN_UNCHECKED]); + +impl<'tcx> LateLintPass<'tcx> for InvalidUtf8InUnchecked { + fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) { + if let Some([arg]) = match_function_call(cx, expr, &paths::STR_FROM_UTF8_UNCHECKED) { + match &arg.kind { + ExprKind::Lit(Spanned { node: lit, .. }) => { + if let LitKind::ByteStr(bytes) = &lit + && std::str::from_utf8(bytes).is_err() + { + lint(cx, expr.span); + } + }, + ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => { + let elements = args.iter().map(|e|{ + match &e.kind { + ExprKind::Lit(Spanned { node: lit, .. }) => match lit { + LitKind::Byte(b) => Some(*b), + #[allow(clippy::cast_possible_truncation)] + LitKind::Int(b, _) => Some(*b as u8), + _ => None + } + _ => None + } + }).collect::>>(); + + if let Some(elements) = elements + && std::str::from_utf8(&elements).is_err() + { + lint(cx, expr.span); + } + } + _ => {} + } + } + } +} + +fn lint(cx: &LateContext<'_>, span: Span) { + span_lint( + cx, + INVALID_UTF8_IN_UNCHECKED, + span, + "non UTF-8 literal in `std::str::from_utf8_unchecked`", + ); +} diff --git a/clippy_lints/src/lib.register_all.rs b/clippy_lints/src/lib.register_all.rs index 563ad891603a..da26a3f01301 100644 --- a/clippy_lints/src/lib.register_all.rs +++ b/clippy_lints/src/lib.register_all.rs @@ -92,6 +92,7 @@ store.register_group(true, "clippy::all", Some("clippy_all"), vec![ LintId::of(init_numbered_fields::INIT_NUMBERED_FIELDS), LintId::of(inline_fn_without_body::INLINE_FN_WITHOUT_BODY), LintId::of(int_plus_one::INT_PLUS_ONE), + LintId::of(invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED), LintId::of(large_const_arrays::LARGE_CONST_ARRAYS), LintId::of(large_enum_variant::LARGE_ENUM_VARIANT), LintId::of(len_zero::COMPARISON_TO_EMPTY), diff --git a/clippy_lints/src/lib.register_correctness.rs b/clippy_lints/src/lib.register_correctness.rs index 7d5e65cb27a1..9975859c54fe 100644 --- a/clippy_lints/src/lib.register_correctness.rs +++ b/clippy_lints/src/lib.register_correctness.rs @@ -29,6 +29,7 @@ store.register_group(true, "clippy::correctness", Some("clippy_correctness"), ve LintId::of(infinite_iter::INFINITE_ITER), LintId::of(inherent_to_string::INHERENT_TO_STRING_SHADOW_DISPLAY), LintId::of(inline_fn_without_body::INLINE_FN_WITHOUT_BODY), + LintId::of(invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED), LintId::of(let_underscore::LET_UNDERSCORE_LOCK), LintId::of(literal_representation::MISTYPED_LITERAL_SUFFIXES), LintId::of(loops::ITER_NEXT_LOOP), diff --git a/clippy_lints/src/lib.register_lints.rs b/clippy_lints/src/lib.register_lints.rs index d3c75f8b5191..ceb8470657f7 100644 --- a/clippy_lints/src/lib.register_lints.rs +++ b/clippy_lints/src/lib.register_lints.rs @@ -196,6 +196,7 @@ store.register_lints(&[ inline_fn_without_body::INLINE_FN_WITHOUT_BODY, int_plus_one::INT_PLUS_ONE, invalid_upcast_comparisons::INVALID_UPCAST_COMPARISONS, + invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED, items_after_statements::ITEMS_AFTER_STATEMENTS, iter_not_returning_iterator::ITER_NOT_RETURNING_ITERATOR, large_const_arrays::LARGE_CONST_ARRAYS, diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index 172fdf8c8526..1604d1078eef 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -255,6 +255,7 @@ mod init_numbered_fields; mod inline_fn_without_body; mod int_plus_one; mod invalid_upcast_comparisons; +mod invalid_utf8_in_unchecked; mod items_after_statements; mod iter_not_returning_iterator; mod large_const_arrays; @@ -913,6 +914,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: store.register_late_pass(move || Box::new(manual_retain::ManualRetain::new(msrv))); let verbose_bit_mask_threshold = conf.verbose_bit_mask_threshold; store.register_late_pass(move || Box::new(operators::Operators::new(verbose_bit_mask_threshold))); + store.register_late_pass(|| Box::new(invalid_utf8_in_unchecked::InvalidUtf8InUnchecked)); // add lints here, do not remove this comment, it's used in `new_lint` } diff --git a/clippy_utils/src/paths.rs b/clippy_utils/src/paths.rs index 6542e77113b4..05429d05d9eb 100644 --- a/clippy_utils/src/paths.rs +++ b/clippy_utils/src/paths.rs @@ -163,6 +163,7 @@ pub const STR_BYTES: [&str; 4] = ["core", "str", "", "bytes"]; pub const STR_CHARS: [&str; 4] = ["core", "str", "", "chars"]; pub const STR_ENDS_WITH: [&str; 4] = ["core", "str", "", "ends_with"]; pub const STR_FROM_UTF8: [&str; 4] = ["core", "str", "converts", "from_utf8"]; +pub const STR_FROM_UTF8_UNCHECKED: [&str; 4] = ["core", "str", "converts", "from_utf8_unchecked"]; pub const STR_LEN: [&str; 4] = ["core", "str", "", "len"]; pub const STR_STARTS_WITH: [&str; 4] = ["core", "str", "", "starts_with"]; #[cfg(feature = "internal")] diff --git a/tests/ui/invalid_utf8_in_unchecked.rs b/tests/ui/invalid_utf8_in_unchecked.rs new file mode 100644 index 000000000000..3dc096d3197f --- /dev/null +++ b/tests/ui/invalid_utf8_in_unchecked.rs @@ -0,0 +1,20 @@ +#![warn(clippy::invalid_utf8_in_unchecked)] + +fn main() { + // Valid + unsafe { + std::str::from_utf8_unchecked(&[99, 108, 105, 112, 112, 121]); + std::str::from_utf8_unchecked(&[b'c', b'l', b'i', b'p', b'p', b'y']); + std::str::from_utf8_unchecked(b"clippy"); + + let x = 0xA0; + std::str::from_utf8_unchecked(&[0xC0, x]); + } + + // Invalid + unsafe { + std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); + std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + std::str::from_utf8_unchecked(b"cl\x82ippy"); + } +} diff --git a/tests/ui/invalid_utf8_in_unchecked.stderr b/tests/ui/invalid_utf8_in_unchecked.stderr new file mode 100644 index 000000000000..c89cd2758ee9 --- /dev/null +++ b/tests/ui/invalid_utf8_in_unchecked.stderr @@ -0,0 +1,22 @@ +error: non UTF-8 literal in `std::str::from_utf8_unchecked` + --> $DIR/invalid_utf8_in_unchecked.rs:16:9 + | +LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: `-D clippy::invalid-utf8-in-unchecked` implied by `-D warnings` + +error: non UTF-8 literal in `std::str::from_utf8_unchecked` + --> $DIR/invalid_utf8_in_unchecked.rs:17:9 + | +LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'/x82', b'i', b'p', b'p', b'y']); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: non UTF-8 literal in `std::str::from_utf8_unchecked` + --> $DIR/invalid_utf8_in_unchecked.rs:18:9 + | +LL | std::str::from_utf8_unchecked(b"cl/x82ippy"); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 3 previous errors +