-
Notifications
You must be signed in to change notification settings - Fork 11.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] Warn about memset/memcpy to NonTriviallyCopyable types #111434
base: main
Are you sure you want to change the base?
[clang] Warn about memset/memcpy to NonTriviallyCopyable types #111434
Conversation
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-clang Author: None (serge-sans-paille) ChangesThis implements a warning that's similar to what GCC does in that context: both memcpy and memset require their first and second operand to be trivially copyable, let's warn if that's not the case. Full diff: https://github.com/llvm/llvm-project/pull/111434.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 583475327c5227..d9bff4a559b3b7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -790,6 +790,10 @@ def warn_cstruct_memaccess : Warning<
"%1 call is a pointer to record %2 that is not trivial to "
"%select{primitive-default-initialize|primitive-copy}3">,
InGroup<NonTrivialMemaccess>;
+def warn_cxxstruct_memaccess : Warning<
+ "%select{destination for|source of|first operand of|second operand of}0 this "
+ "%1 call is a pointer to record %2 that is not trivially-copyable">,
+ InGroup<NonTrivialMemaccess>;
def note_nontrivial_field : Note<
"field is non-trivial to %select{copy|default-initialize}0">;
def err_non_trivial_c_union_in_invalid_context : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2bcb930acdcb57..46dda34d0ac8f3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -8899,18 +8899,42 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call,
<< ArgIdx << FnName << PointeeTy
<< Call->getCallee()->getSourceRange());
else if (const auto *RT = PointeeTy->getAs<RecordType>()) {
+
+ auto IsTriviallyCopyableCXXRecord = [](auto const *RT) {
+ auto const *D = RT->getDecl();
+ if (!D)
+ return true;
+ auto const *RD = dyn_cast<CXXRecordDecl>(D);
+ if (!RD)
+ return true;
+ RD = RD->getDefinition();
+ if (!RD)
+ return true;
+ return RD->isTriviallyCopyable();
+ };
+
if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) &&
RT->getDecl()->isNonTrivialToPrimitiveDefaultInitialize()) {
DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
PDiag(diag::warn_cstruct_memaccess)
<< ArgIdx << FnName << PointeeTy << 0);
SearchNonTrivialToInitializeField::diag(PointeeTy, Dest, *this);
+ } else if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) &&
+ !IsTriviallyCopyableCXXRecord(RT)) {
+ DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
+ PDiag(diag::warn_cxxstruct_memaccess)
+ << ArgIdx << FnName << PointeeTy);
} else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) &&
RT->getDecl()->isNonTrivialToPrimitiveCopy()) {
DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
PDiag(diag::warn_cstruct_memaccess)
<< ArgIdx << FnName << PointeeTy << 1);
SearchNonTrivialToCopyField::diag(PointeeTy, Dest, *this);
+ } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) &&
+ !IsTriviallyCopyableCXXRecord(RT)) {
+ DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
+ PDiag(diag::warn_cxxstruct_memaccess)
+ << ArgIdx << FnName << PointeeTy);
} else {
continue;
}
diff --git a/clang/test/SemaCXX/constexpr-string.cpp b/clang/test/SemaCXX/constexpr-string.cpp
index c456740ef7551f..26e2e138ef34e0 100644
--- a/clang/test/SemaCXX/constexpr-string.cpp
+++ b/clang/test/SemaCXX/constexpr-string.cpp
@@ -603,12 +603,16 @@ namespace MemcpyEtc {
};
constexpr bool test_nontrivial_memcpy() { // expected-error {{never produces a constant}}
NonTrivial arr[3] = {};
+ // expected-warning@+2 {{source of this '__builtin_memcpy' call is a pointer to record 'NonTrivial' that is not trivially-copyable}}
+ // expected-note@+1 {{explicitly cast the pointer to silence this warning}}
__builtin_memcpy(arr, arr + 1, sizeof(NonTrivial)); // expected-note 2{{non-trivially-copyable}}
return true;
}
static_assert(test_nontrivial_memcpy()); // expected-error {{constant}} expected-note {{in call}}
constexpr bool test_nontrivial_memmove() { // expected-error {{never produces a constant}}
NonTrivial arr[3] = {};
+ // expected-warning@+2 {{source of this '__builtin_memcpy' call is a pointer to record 'NonTrivial' that is not trivially-copyable}}
+ // expected-note@+1 {{explicitly cast the pointer to silence this warning}}
__builtin_memcpy(arr, arr + 1, sizeof(NonTrivial)); // expected-note 2{{non-trivially-copyable}}
return true;
}
|
This change is triggering warnings at several point in the code base, I'll fix that too. |
|
3f44807
to
bc3b844
Compare
cc @ldionne got the compiler-rt part. |
Some of the libcxx builds fail, they are marked as "build stopped: interrupted by user."... I can't find the origin |
@@ -102,7 +102,7 @@ struct __aliasing_iterator_wrapper { | |||
|
|||
_LIBCPP_HIDE_FROM_ABI _Alias operator*() const _NOEXCEPT { | |||
_Alias __val; | |||
__builtin_memcpy(&__val, std::__to_address(__base_), sizeof(value_type)); | |||
__builtin_memcpy(&__val, reinterpret_cast<void*>(std::__to_address(__base_)), sizeof(value_type)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO this is a false-positive. This inspects the non-trivial object and implicitly starts the lifetime of _Alias
, which is perfectly defined behaviour.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The cast should probably be static_cast<void*>
also
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ack for the false positive. I'll move that to a static_cast which should silent the warning.
Please note: GCC is more strict; for std::memset, it requires the type to be trivial, not just trivially copyable: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107361 I want to remember it was also more strict for std::memcpy, but I haven't been able to reproduce it. I haven't looked at the patch in detail but for consistency with GCC it should also support suppressing the warning by explictly casting the pointers to void* (maybe it's already in place). |
bc3b844
to
c9a013f
Compare
ACK. Not a standard requirement though, is it?
Yeah, that's part of current logic clang in clang too (thus the various cast added in this PR) |
Correct, it's only UB on non-trivially-copyable. My point was more about if we want to be consistent with GCC or not. I don't have any strong opinion on that. |
c9a013f
to
92f14e6
Compare
I can see an argument that a user probably is doing something wrong if they |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for this! It's missing a lot of test coverage that should be added to clang/test/SemaCXX and you should also add a release note to clang/docs/ReleaseNotes.rst so users know about the new diagnostic.
"%select{destination for|source of|first operand of|second operand of}0 this " | ||
"%1 call is a pointer to record %2 that is not trivially-copyable">, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"%select{destination for|source of|first operand of|second operand of}0 this " | |
"%1 call is a pointer to record %2 that is not trivially-copyable">, | |
"%select{destination for|source of|first operand of|second operand of}0 call to " | |
"%1 is a pointer to non-trivially copyable type %2">, |
Slight rewording
clang/lib/Sema/SemaChecking.cpp
Outdated
@@ -8899,18 +8899,42 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, | |||
<< ArgIdx << FnName << PointeeTy | |||
<< Call->getCallee()->getSourceRange()); | |||
else if (const auto *RT = PointeeTy->getAs<RecordType>()) { | |||
|
|||
auto IsTriviallyCopyableCXXRecord = [](auto const *RT) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto IsTriviallyCopyableCXXRecord = [](auto const *RT) { | |
auto IsTriviallyCopyableCXXRecord = [](const RecordType *RT) { |
Do not use auto
when there's only one type this can be called with (I just spent a lot of time writing comments that I had to delete because this can only accept a RecordType
).
Actually, I don't think we even need the lambda. You could precalculate this as a local variable:
bool IsTriviallyCopyableCXXRecord = false;
if (const auto *RD = RT->getAsCXXRecordDecl())
IsTriviallyCopyableCXXRecord = RD->isTriviallyCopyable();
should suffice. (The call should return false
if the record has no definition because that's an incomplete type but that's a good test case to ensure we have.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, @erichkeane just reminded me that we have QualType::isTriviallyCopyableType()
-- that's an even better interface to use IMO.
clang/lib/Sema/SemaChecking.cpp
Outdated
@@ -8899,18 +8899,42 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, | |||
<< ArgIdx << FnName << PointeeTy | |||
<< Call->getCallee()->getSourceRange()); | |||
else if (const auto *RT = PointeeTy->getAs<RecordType>()) { | |||
|
|||
auto IsTriviallyCopyableCXXRecord = [](auto const *RT) { | |||
auto const *D = RT->getDecl(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto const *D = RT->getDecl(); | |
const Decl *D = RT->getDecl(); |
Please only use auto
when the type is spelled out in the initialization. Also, we use const Type
and not Type const
as the prevailing style, so you should stick with that.
clang/lib/Sema/SemaChecking.cpp
Outdated
auto const *D = RT->getDecl(); | ||
if (!D) | ||
return true; | ||
auto const *RD = dyn_cast<CXXRecordDecl>(D); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto const *RD = dyn_cast<CXXRecordDecl>(D); | |
const auto *RD = dyn_cast<CXXRecordDecl>(D); |
@@ -102,7 +102,7 @@ struct __aliasing_iterator_wrapper { | |||
|
|||
_LIBCPP_HIDE_FROM_ABI _Alias operator*() const _NOEXCEPT { | |||
_Alias __val; | |||
__builtin_memcpy(&__val, std::__to_address(__base_), sizeof(value_type)); | |||
__builtin_memcpy(&__val, static_cast<const void*>(std::__to_address(__base_)), sizeof(value_type)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to make it visible again: IMO this is a false-positive, since this is inspecting a non-trivial type, which is perfectly defined behaviour.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's one thing I don't understand: my understanding is that passing a pointer to a non-trivially copyable type to memcpy first or second argument is UB and _BaseIter
maybe a non-trivially copyable type, so what happens then?
92f14e6
to
9cd993d
Compare
This implements a warning that's similar to what GCC does in that context: both memcpy and memset require their first and second operand to be trivially copyable, let's warn if that's not the case.
9cd993d
to
67de33b
Compare
67de33b
to
31be3da
Compare
@AaronBallman doc, tests and style updated. Thanks for the review! |
This implements a warning that's similar to what GCC does in that context: both memcpy and memset require their first and second operand to be trivially copyable, let's warn if that's not the case.