-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DSE] Update dereferenceable attributes when adjusting memintrinsic ptr #125073
Conversation
Consider IR like this call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p, i8 0, i64 28, i1 false) store i32 1, ptr %p It has been optimized like this: %p2 = getelementptr inbounds i8, ptr %p, i64 4 call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p2, i8 0, i64 24, i1 false) store i32 1, ptr %p As the input IR doesn't guarantee that it is OK to deref 28 bytes starting at the adjusted pointer %p2 the transformation has been a bit flawed. With this patch we make sure to also adjust the size of any dereferenceable/dereferenceable_or_null attributes when doing the transform in tryToShorten (when adjusting the start pointer). So now we will get dereferenceable(24) in the example above.
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-ir Author: Björn Pettersson (bjope) ChangesConsider IR like this It has been optimized like this: As the input IR doesn't guarantee that it is OK to deref 28 bytes starting at the adjusted pointer %p2 the transformation has been a bit flawed. With this patch we make sure to also adjust the size of any dereferenceable/dereferenceable_or_null attributes when doing the transform in tryToShorten (when adjusting the start pointer). So now we will get dereferenceable(24) in the example above. Full diff: https://github.com/llvm/llvm-project/pull/125073.diff 3 Files Affected:
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 47ddc7555594c5..f26aafbe3f0ef8 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1559,6 +1559,11 @@ class CallBase : public Instruction {
Attrs = Attrs.addDereferenceableParamAttr(getContext(), i, Bytes);
}
+ /// adds the dereferenceable attribute to the list of attributes.
+ void addDereferenceableOrNullParamAttr(unsigned i, uint64_t Bytes) {
+ Attrs = Attrs.addDereferenceableOrNullParamAttr(getContext(), i, Bytes);
+ }
+
/// adds the dereferenceable attribute to the list of attributes.
void addDereferenceableRetAttr(uint64_t Bytes) {
Attrs = Attrs.addDereferenceableRetAttr(getContext(), Bytes);
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 13f3de07c3c44d..c16134e3f01598 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -563,6 +563,23 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
for_each(LinkedDVRAssigns, InsertAssignForOverlap);
}
+// Helper to trim or drop any dereferencable/dereferencable_or_null attributes
+// for a given argument, based on the new access being restricted to derefence
+// bytes in the range [Offset, Offset+Size).
+static void trimDereferencableAttrs(AnyMemIntrinsic *Intrinsic, unsigned Arg,
+ uint64_t Offset, uint64_t Size) {
+ uint64_t End = Offset + Size;
+ if (Intrinsic->getParamDereferenceableBytes(Arg) >= End)
+ Intrinsic->addDereferenceableParamAttr(Arg, Size);
+ else
+ Intrinsic->removeParamAttr(Arg, Attribute::Dereferenceable);
+ if (Intrinsic->getParamDereferenceableOrNullBytes(Arg) >= End)
+ Intrinsic->addDereferenceableOrNullParamAttr(Arg, Size);
+ else
+ Intrinsic->removeParamAttr(Arg, Attribute::DereferenceableOrNull);
+}
+
+
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
uint64_t &DeadSize, int64_t KillingStart,
uint64_t KillingSize, bool IsOverwriteEnd) {
@@ -644,6 +661,7 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
DeadI->getIterator());
NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
DeadIntrinsic->setDest(NewDestGEP);
+ trimDereferencableAttrs(DeadIntrinsic, 0, ToRemoveSize, NewSize);
}
// Update attached dbg.assign intrinsics. Assume 8-bit byte.
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
index bc1756f6ca9d1b..6c42d8e2c98338 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
@@ -402,3 +402,33 @@ entry:
store i64 1, ptr %p, align 1
ret void
}
+
+; Verify that we adjust the dereferenceable attribute.
+define void @dereferenceable(ptr nocapture %p) {
+; CHECK-LABEL: @dereferenceable(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 dereferenceable(24) [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: store i32 1, ptr [[P]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.memset.p0.i64(ptr dereferenceable(28) align 4 %p, i8 0, i64 28, i1 false)
+ store i32 1, ptr %p, align 4
+ ret void
+}
+
+; Verify that we adjust the dereferenceable_or_null attribute.
+define void @dereferenceable_or_null(ptr nocapture %p) {
+; CHECK-LABEL: @dereferenceable_or_null(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 dereferenceable_or_null(20) [[TMP0]], i8 0, i64 20, i1 false)
+; CHECK-NEXT: store i64 1, ptr [[P]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.memset.p0.i64(ptr dereferenceable_or_null(28) align 4 %p, i8 0, i64 28, i1 false)
+ store i64 1, ptr %p, align 4
+ ret void
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
uint64_t Offset, uint64_t Size) { | ||
uint64_t End = Offset + Size; | ||
if (Intrinsic->getParamDereferenceableBytes(Arg) >= End) | ||
Intrinsic->addDereferenceableParamAttr(Arg, Size); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you're going to actually adjust the attributes, I'd expect to see something like "OldDerefenceableBytes - Offset" here, so that information about larger dereferenceability than the memset length is preserved.
But really, I'd consider to just completely drop the attributes in this transform. memset with a constant length already implies dereferenceability by itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, thanks. I wasn't quite sure if dropping the attributes could have some downsides, but just dropping them is simpler so I like that solution. So I've updated the patch to just drop the attributes instead of trying to adjust them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably we should drop all attributes here, so we don't need to adjust this whenever we add a new attribute to LLVM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dropping all attributes would also lose alignment, which is correctly adjusted and might not be recoverable...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see some alternatives:
- Instead of having an explicit set of attributes to drop we could use prepare a set of attributes that is safe to keep. And then use that for the filtering. That would more future proof in case new attributes are added.
- Another idea is to add some kind of support in Attributes.h (or similar). Such as a more centralized helper method to "filter attributes based on access being narrowed" (and then it might be easier to find and update that when adding new attributes). This could also just be an AttributeMask similar to getUBImplyingAttributes().
when there is a risk that they would become incorrect if neither adjusting nor dropping them.
Does it fix #115976? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This introduces one regression: https://llvm.godbolt.org/z/1EjrE9xM6
It's due to this check:
if (DefInst->isIdenticalTo(UpperInst)) |
We should be doing the check in IntersectAttrs mode.
Regression should be fixed by #125190. |
Yes it should. I've added as "Fixes" in the description. |
handle. That should be more future proof as we automatically would drop any new attributes being added.
Ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
…tr (llvm#125073) Consider IR like this call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p, i8 0, i64 28, i1 false) store i32 1, ptr %p In the past it has been optimized like this: %p2 = getelementptr inbounds i8, ptr %p, i64 4 call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p2, i8 0, i64 24, i1 false) store i32 1, ptr %p As the input IR doesn't guarantee that it is OK to deref 28 bytes starting at the adjusted pointer %p2 the transformation has been a bit flawed. With this patch we make sure to drop any dereferenceable/dereferenceable_or_null attributes when doing such transforms. An alternative would have been to adjust the amount of dereferenceable bytes, but since a memset with a constant length already implies dereferenceability by itself it is simpler to just drop the attributes. The new filtering of attributes is done using a helper that only keep attributes that we explicitly handle. For the adjusted mem instrinsic pointers that currently involve "NonNull", "NoUndef" and "Alignment" (when the alignment is known to be fulfilled also after offsetting the pointer). Fixes llvm#115976
Consider IR like this
call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p, i8 0, i64 28, i1 false)
store i32 1, ptr %p
In the past it has been optimized like this:
%p2 = getelementptr inbounds i8, ptr %p, i64 4
call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p2, i8 0, i64 24, i1 false)
store i32 1, ptr %p
As the input IR doesn't guarantee that it is OK to deref 28 bytes starting at the adjusted pointer %p2 the transformation has been a bit flawed.
With this patch we make sure to drop any dereferenceable/dereferenceable_or_null attributes when doing such transforms. An alternative would have been to adjust the amount of dereferenceable bytes, but since a memset with a constant length already implies dereferenceability by itself it is simpler to just drop the attributes.
The new filtering of attributes is done using a helper that only keep attributes that we explicitly handle. For the adjusted mem instrinsic pointers that currently involve "NonNull", "NoUndef" and "Alignment" (when the alignment is known to be fulfilled also after offsetting the pointer).
Fixes #115976