Skip to content

Commit

Permalink
Auto merge of #9105 - Serial-ATA:lint-invalid-utf8, r=Jarcho
Browse files Browse the repository at this point in the history
Add `invalid_utf8_in_unchecked`

changelog: Add [`invalid_utf8_in_unchecked`]
closes: #629

Don't know how useful of a lint this is, just saw this was a really old issue 😄.
  • Loading branch information
bors committed Jul 3, 2022
2 parents be9e35f + de646e1 commit 8c89877
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3641,6 +3641,7 @@ Released 2018-09-13
[`invalid_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_ref
[`invalid_regex`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_regex
[`invalid_upcast_comparisons`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_upcast_comparisons
[`invalid_utf8_in_unchecked`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_utf8_in_unchecked
[`invisible_characters`]: https://rust-lang.github.io/rust-clippy/master/index.html#invisible_characters
[`is_digit_ascii_radix`]: https://rust-lang.github.io/rust-clippy/master/index.html#is_digit_ascii_radix
[`items_after_statements`]: https://rust-lang.github.io/rust-clippy/master/index.html#items_after_statements
Expand Down
74 changes: 74 additions & 0 deletions clippy_lints/src/invalid_utf8_in_unchecked.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
use clippy_utils::diagnostics::span_lint;
use clippy_utils::{match_function_call, paths};
use rustc_ast::{BorrowKind, LitKind};
use rustc_hir::{Expr, ExprKind};
use rustc_lint::{LateContext, LateLintPass};
use rustc_session::{declare_lint_pass, declare_tool_lint};
use rustc_span::source_map::Spanned;
use rustc_span::Span;

declare_clippy_lint! {
/// ### What it does
/// Checks for `std::str::from_utf8_unchecked` with an invalid UTF-8 literal
///
/// ### Why is this bad?
/// Creating such a `str` would result in undefined behavior
///
/// ### Example
/// ```rust
/// # #[allow(unused)]
/// unsafe {
/// std::str::from_utf8_unchecked(b"cl\x82ippy");
/// }
/// ```
#[clippy::version = "1.64.0"]
pub INVALID_UTF8_IN_UNCHECKED,
correctness,
"using a non UTF-8 literal in `std::std::from_utf8_unchecked`"
}
declare_lint_pass!(InvalidUtf8InUnchecked => [INVALID_UTF8_IN_UNCHECKED]);

impl<'tcx> LateLintPass<'tcx> for InvalidUtf8InUnchecked {
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
if let Some([arg]) = match_function_call(cx, expr, &paths::STR_FROM_UTF8_UNCHECKED) {
match &arg.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes) = &lit
&& std::str::from_utf8(bytes).is_err()
{
lint(cx, expr.span);
}
},
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
let elements = args.iter().map(|e|{
match &e.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
LitKind::Byte(b) => Some(*b),
#[allow(clippy::cast_possible_truncation)]
LitKind::Int(b, _) => Some(*b as u8),
_ => None
}
_ => None
}
}).collect::<Option<Vec<_>>>();

if let Some(elements) = elements
&& std::str::from_utf8(&elements).is_err()
{
lint(cx, expr.span);
}
}
_ => {}
}
}
}
}

fn lint(cx: &LateContext<'_>, span: Span) {
span_lint(
cx,
INVALID_UTF8_IN_UNCHECKED,
span,
"non UTF-8 literal in `std::str::from_utf8_unchecked`",
);
}
1 change: 1 addition & 0 deletions clippy_lints/src/lib.register_all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ store.register_group(true, "clippy::all", Some("clippy_all"), vec![
LintId::of(init_numbered_fields::INIT_NUMBERED_FIELDS),
LintId::of(inline_fn_without_body::INLINE_FN_WITHOUT_BODY),
LintId::of(int_plus_one::INT_PLUS_ONE),
LintId::of(invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED),
LintId::of(large_const_arrays::LARGE_CONST_ARRAYS),
LintId::of(large_enum_variant::LARGE_ENUM_VARIANT),
LintId::of(len_zero::COMPARISON_TO_EMPTY),
Expand Down
1 change: 1 addition & 0 deletions clippy_lints/src/lib.register_correctness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ store.register_group(true, "clippy::correctness", Some("clippy_correctness"), ve
LintId::of(infinite_iter::INFINITE_ITER),
LintId::of(inherent_to_string::INHERENT_TO_STRING_SHADOW_DISPLAY),
LintId::of(inline_fn_without_body::INLINE_FN_WITHOUT_BODY),
LintId::of(invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED),
LintId::of(let_underscore::LET_UNDERSCORE_LOCK),
LintId::of(literal_representation::MISTYPED_LITERAL_SUFFIXES),
LintId::of(loops::ITER_NEXT_LOOP),
Expand Down
1 change: 1 addition & 0 deletions clippy_lints/src/lib.register_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ store.register_lints(&[
inline_fn_without_body::INLINE_FN_WITHOUT_BODY,
int_plus_one::INT_PLUS_ONE,
invalid_upcast_comparisons::INVALID_UPCAST_COMPARISONS,
invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED,
items_after_statements::ITEMS_AFTER_STATEMENTS,
iter_not_returning_iterator::ITER_NOT_RETURNING_ITERATOR,
large_const_arrays::LARGE_CONST_ARRAYS,
Expand Down
2 changes: 2 additions & 0 deletions clippy_lints/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ mod init_numbered_fields;
mod inline_fn_without_body;
mod int_plus_one;
mod invalid_upcast_comparisons;
mod invalid_utf8_in_unchecked;
mod items_after_statements;
mod iter_not_returning_iterator;
mod large_const_arrays;
Expand Down Expand Up @@ -913,6 +914,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
store.register_late_pass(move || Box::new(manual_retain::ManualRetain::new(msrv)));
let verbose_bit_mask_threshold = conf.verbose_bit_mask_threshold;
store.register_late_pass(move || Box::new(operators::Operators::new(verbose_bit_mask_threshold)));
store.register_late_pass(|| Box::new(invalid_utf8_in_unchecked::InvalidUtf8InUnchecked));
// add lints here, do not remove this comment, it's used in `new_lint`
}

Expand Down
1 change: 1 addition & 0 deletions clippy_utils/src/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ pub const STR_BYTES: [&str; 4] = ["core", "str", "<impl str>", "bytes"];
pub const STR_CHARS: [&str; 4] = ["core", "str", "<impl str>", "chars"];
pub const STR_ENDS_WITH: [&str; 4] = ["core", "str", "<impl str>", "ends_with"];
pub const STR_FROM_UTF8: [&str; 4] = ["core", "str", "converts", "from_utf8"];
pub const STR_FROM_UTF8_UNCHECKED: [&str; 4] = ["core", "str", "converts", "from_utf8_unchecked"];
pub const STR_LEN: [&str; 4] = ["core", "str", "<impl str>", "len"];
pub const STR_STARTS_WITH: [&str; 4] = ["core", "str", "<impl str>", "starts_with"];
#[cfg(feature = "internal")]
Expand Down
20 changes: 20 additions & 0 deletions tests/ui/invalid_utf8_in_unchecked.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#![warn(clippy::invalid_utf8_in_unchecked)]

fn main() {
// Valid
unsafe {
std::str::from_utf8_unchecked(&[99, 108, 105, 112, 112, 121]);
std::str::from_utf8_unchecked(&[b'c', b'l', b'i', b'p', b'p', b'y']);
std::str::from_utf8_unchecked(b"clippy");

let x = 0xA0;
std::str::from_utf8_unchecked(&[0xC0, x]);
}

// Invalid
unsafe {
std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
std::str::from_utf8_unchecked(b"cl\x82ippy");
}
}
22 changes: 22 additions & 0 deletions tests/ui/invalid_utf8_in_unchecked.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
error: non UTF-8 literal in `std::str::from_utf8_unchecked`
--> $DIR/invalid_utf8_in_unchecked.rs:16:9
|
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: `-D clippy::invalid-utf8-in-unchecked` implied by `-D warnings`

error: non UTF-8 literal in `std::str::from_utf8_unchecked`
--> $DIR/invalid_utf8_in_unchecked.rs:17:9
|
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'/x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

error: non UTF-8 literal in `std::str::from_utf8_unchecked`
--> $DIR/invalid_utf8_in_unchecked.rs:18:9
|
LL | std::str::from_utf8_unchecked(b"cl/x82ippy");
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

error: aborting due to 3 previous errors

0 comments on commit 8c89877

Please sign in to comment.