-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Inliner] Propagate more attributes to params when inlining #91101
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-ir Author: None (goldsteinn) Changes
Patch is 21.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91101.diff 10 Files Affected:
diff --git a/clang/test/CodeGen/attr-counted-by-pr88931.cpp b/clang/test/CodeGen/attr-counted-by-pr88931.cpp
index 2a8cc1d07e50d9..6d0c46bbbe8f9c 100644
--- a/clang/test/CodeGen/attr-counted-by-pr88931.cpp
+++ b/clang/test/CodeGen/attr-counted-by-pr88931.cpp
@@ -13,7 +13,7 @@ void init(void * __attribute__((pass_dynamic_object_size(0))));
// CHECK-LABEL: define dso_local void @_ZN3foo3barC1Ev(
// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
// CHECK-NEXT: entry:
-// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull align 4 dereferenceable(1) [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
foo::bar::bar() {
diff --git a/clang/test/OpenMP/bug57757.cpp b/clang/test/OpenMP/bug57757.cpp
index e1f646e2b141a0..c4e309d7f566b5 100644
--- a/clang/test/OpenMP/bug57757.cpp
+++ b/clang/test/OpenMP/bug57757.cpp
@@ -39,7 +39,7 @@ void foo() {
// CHECK-NEXT: ]
// CHECK: .untied.jmp..i:
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope [[META13]], !noalias [[META17]]
-// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias [[META13]]
+// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP1]]), !noalias [[META13]]
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK: .untied.next..i:
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index dd11955714895e..337254906db885 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -752,6 +752,11 @@ class AttributeList {
[[nodiscard]] AttributeList addRangeRetAttr(LLVMContext &C,
const ConstantRange &CR) const;
+ /// Add the range attribute to the attribute set at the given arg index.
+ /// Returns a new list because attribute lists are immutable.
+ [[nodiscard]] AttributeList addRangeParamAttr(LLVMContext &C, unsigned Index,
+ const ConstantRange &CR) const;
+
/// Add the allocsize attribute to the attribute set at the given arg index.
/// Returns a new list because attribute lists are immutable.
[[nodiscard]] AttributeList
@@ -906,6 +911,9 @@ class AttributeList {
/// arg.
uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;
+ /// Get range (or std::nullopt if unknown) of an arg.
+ std::optional<ConstantRange> getParamRange(unsigned ArgNo) const;
+
/// Get the disallowed floating-point classes of the return value.
FPClassTest getRetNoFPClass() const;
@@ -1082,6 +1090,10 @@ class AttrBuilder {
/// invalid if the Kind is not present in the builder.
Attribute getAttribute(StringRef Kind) const;
+ /// Retrieve the range if the attribute exists (std::nullopt is returned
+ /// otherwise).
+ std::optional<ConstantRange> getRange() const;
+
/// Return raw (possibly packed/encoded) value of integer attribute or
/// std::nullopt if not set.
std::optional<uint64_t> getRawIntAttr(Attribute::AttrKind Kind) const;
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index b9af3a6ca42c06..87335f0b28c6b4 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -2198,6 +2198,10 @@ class CallBase : public Instruction {
/// parameter.
FPClassTest getParamNoFPClass(unsigned i) const;
+ /// If arg ArgNo has a range attribute, return the value range of the
+ /// argument. Otherwise, std::nullopt is returned.
+ std::optional<ConstantRange> getParamRange(unsigned ArgNo) const;
+
/// If this return value has a range attribute, return the value range of the
/// argument. Otherwise, std::nullopt is returned.
std::optional<ConstantRange> getRange() const;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index c8d6bdd423878b..0cbfe923032c86 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -1530,6 +1530,13 @@ AttributeList::addDereferenceableOrNullParamAttr(LLVMContext &C, unsigned Index,
return addParamAttributes(C, Index, B);
}
+AttributeList AttributeList::addRangeParamAttr(LLVMContext &C, unsigned Index,
+ const ConstantRange &CR) const {
+ AttrBuilder B(C);
+ B.addRangeAttr(CR);
+ return addParamAttributes(C, Index, B);
+}
+
AttributeList AttributeList::addRangeRetAttr(LLVMContext &C,
const ConstantRange &CR) const {
AttrBuilder B(C);
@@ -1658,6 +1665,14 @@ AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
return getParamAttrs(Index).getDereferenceableOrNullBytes();
}
+std::optional<ConstantRange>
+AttributeList::getParamRange(unsigned Index) const {
+ auto RangeAttr = getParamAttrs(Index).getAttribute(Attribute::Range);
+ if (RangeAttr.isValid())
+ return RangeAttr.getRange();
+ return std::nullopt;
+}
+
FPClassTest AttributeList::getRetNoFPClass() const {
return getRetAttrs().getNoFPClass();
}
@@ -1991,6 +2006,13 @@ Attribute AttrBuilder::getAttribute(StringRef A) const {
return {};
}
+std::optional<ConstantRange> AttrBuilder::getRange() const {
+ const Attribute RangeAttr = getAttribute(Attribute::Range);
+ if (RangeAttr.isValid())
+ return RangeAttr.getRange();
+ return std::nullopt;
+}
+
bool AttrBuilder::contains(Attribute::AttrKind A) const {
return getAttribute(A).isValid();
}
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 7ad1ad4cddb703..ee832d2093a132 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -396,6 +396,13 @@ FPClassTest CallBase::getParamNoFPClass(unsigned i) const {
return Mask;
}
+std::optional<ConstantRange> CallBase::getParamRange(unsigned ArgNo) const {
+ const Attribute RangeAttr = getParamAttr(ArgNo, llvm::Attribute::Range);
+ if (RangeAttr.isValid())
+ return RangeAttr.getRange();
+ return std::nullopt;
+}
+
std::optional<ConstantRange> CallBase::getRange() const {
const Attribute RangeAttr = getRetAttr(llvm::Attribute::Range);
if (RangeAttr.isValid())
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 1aae561d8817b5..41f899fe120f63 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1352,20 +1352,43 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
auto &Context = CalledFunction->getContext();
// Collect valid attributes for all params.
- SmallVector<AttrBuilder> ValidParamAttrs;
+ SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
bool HasAttrToPropagate = false;
for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
- ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
+ ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
+ ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
// Access attributes can be propagated to any param with the same underlying
// object as the argument.
if (CB.paramHasAttr(I, Attribute::ReadNone))
- ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
+ ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
if (CB.paramHasAttr(I, Attribute::ReadOnly))
- ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
+ ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
if (CB.paramHasAttr(I, Attribute::WriteOnly))
- ValidParamAttrs.back().addAttribute(Attribute::WriteOnly);
- HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
+ ValidObjParamAttrs.back().addAttribute(Attribute::WriteOnly);
+
+ // Attributes we can only propagate if the exact parameter is forwarded.
+
+ // We can propagate both poison generating an UB generating attributes
+ // without any extra checks. The only attribute that is tricky to propagate
+ // is `noundef` (skipped for now) as that can create new UB where previous
+ // behavior was just using a poison value.
+ if (auto DerefBytes = CB.getParamDereferenceableBytes(I))
+ ValidExactParamAttrs.back().addDereferenceableAttr(DerefBytes);
+ if (auto DerefOrNullBytes = CB.getParamDereferenceableOrNullBytes(I))
+ ValidExactParamAttrs.back().addDereferenceableOrNullAttr(
+ DerefOrNullBytes);
+ if (CB.paramHasAttr(I, Attribute::NoFree))
+ ValidExactParamAttrs.back().addAttribute(Attribute::NoFree);
+ if (CB.paramHasAttr(I, Attribute::NonNull))
+ ValidExactParamAttrs.back().addAttribute(Attribute::NonNull);
+ if (auto Align = CB.getParamAlign(I))
+ ValidExactParamAttrs.back().addAlignmentAttr(Align);
+ if (auto Range = CB.getParamRange(I))
+ ValidExactParamAttrs.back().addRangeAttr(*Range);
+
+ HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
+ HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
}
// Won't be able to propagate anything.
@@ -1383,15 +1406,54 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
AttributeList AL = NewInnerCB->getAttributes();
for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
// Check if the underlying value for the parameter is an argument.
- const Value *UnderlyingV =
- getUnderlyingObject(InnerCB->getArgOperand(I));
- const Argument *Arg = dyn_cast<Argument>(UnderlyingV);
- if (!Arg)
- continue;
+ const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I));
+ unsigned ArgNo;
+ if (Arg) {
+ ArgNo = Arg->getArgNo();
+ // For dereferenceable, dereferenceable_or_null, align, etc...
+ // we don't want to propagate if the existing param has the same
+ // attribute with "better" constraints. So, only remove from the
+ // existing AL if the region of the existing param is smaller than
+ // what we can propagate. AttributeList's merge API honours the
+ // already existing attribute value so we choose the "better"
+ // attribute by removing if the existing one is worse.
+ if (AL.getParamDereferenceableBytes(I) <
+ ValidExactParamAttrs[ArgNo].getDereferenceableBytes())
+ AL =
+ AL.removeParamAttribute(Context, I, Attribute::Dereferenceable);
+ if (AL.getParamDereferenceableOrNullBytes(I) <
+ ValidExactParamAttrs[ArgNo].getDereferenceableOrNullBytes())
+ AL =
+ AL.removeParamAttribute(Context, I, Attribute::Dereferenceable);
+ if (AL.getParamAlignment(I).valueOrOne() <
+ ValidExactParamAttrs[ArgNo].getAlignment().valueOrOne())
+ AL = AL.removeParamAttribute(Context, I, Attribute::Alignment);
+
+ auto ExistingRange = AL.getParamRange(I);
+ AL = AL.addParamAttributes(Context, I, ValidExactParamAttrs[ArgNo]);
+
+ // For range we use the exact intersection.
+ if (ExistingRange.has_value()) {
+ if (auto NewRange = ValidExactParamAttrs[ArgNo].getRange()) {
+ auto CombinedRange = ExistingRange->exactIntersectWith(*NewRange);
+ if (!CombinedRange.has_value())
+ CombinedRange =
+ ConstantRange::getEmpty(NewRange->getBitWidth());
+ AL = AL.removeParamAttribute(Context, I, Attribute::Range);
+ AL = AL.addRangeParamAttr(Context, I, *CombinedRange);
+ }
+ }
+ } else {
+ const Value *UnderlyingV =
+ getUnderlyingObject(InnerCB->getArgOperand(I));
+ Arg = dyn_cast<Argument>(UnderlyingV);
+ if (!Arg)
+ continue;
+ ArgNo = Arg->getArgNo();
+ }
- unsigned ArgNo = Arg->getArgNo();
// If so, propagate its access attributes.
- AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);
+ AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
// We can have conflicting attributes from the inner callsite and
// to-be-inlined callsite. In that case, choose the most
// restrictive.
diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index ffd31fbe8ae107..e25023da6ed5ff 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -46,7 +46,6 @@ define dso_local void @foo3_writable(ptr %p) {
ret void
}
-
define dso_local void @foo1_bar_aligned64_deref512(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@foo1_bar_aligned64_deref512
; CHECK-SAME: (ptr [[P:%.*]]) {
@@ -295,7 +294,7 @@ define void @prop_param_callbase_def_1x_partial_3(ptr %p, ptr %p2) {
define void @prop_deref(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_deref
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr dereferenceable(16) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr dereferenceable(16) %p)
@@ -305,7 +304,7 @@ define void @prop_deref(ptr %p) {
define void @prop_deref_or_null(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_deref_or_null
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr dereferenceable_or_null(256) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr dereferenceable_or_null(256) %p)
@@ -315,17 +314,27 @@ define void @prop_deref_or_null(ptr %p) {
define void @prop_param_nonnull_and_align(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_nonnull_and_align
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr nonnull align 32 [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr nonnull align 32 %p)
ret void
}
+define void @prop_param_nofree_and_align(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @bar1(ptr nofree align 32 [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @foo1(ptr nofree align 32 %p)
+ ret void
+}
+
define void @prop_param_deref_align_no_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_no_update
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]])
+; CHECK-NEXT: call void @bar1(ptr align 4 dereferenceable(64) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned64_deref512(ptr align 4 dereferenceable(64) %p)
@@ -335,7 +344,7 @@ define void @prop_param_deref_align_no_update(ptr %p) {
define void @prop_param_deref_align_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_update
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]])
+; CHECK-NEXT: call void @bar1(ptr align 128 dereferenceable(1024) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned64_deref512(ptr align 128 dereferenceable(1024) %p)
@@ -345,7 +354,7 @@ define void @prop_param_deref_align_update(ptr %p) {
define void @prop_param_deref_or_null_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_update
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]])
+; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(1024) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(1024) %p)
@@ -355,7 +364,7 @@ define void @prop_param_deref_or_null_update(ptr %p) {
define void @prop_param_deref_or_null_no_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_no_update
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]])
+; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(32) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(32) %p)
@@ -528,7 +537,6 @@ define void @prop_no_conflict_writable(ptr %p) {
ret void
}
-
define void @prop_no_conflict_writable2(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_no_conflict_writable2
; CHECK-SAME: (ptr [[P:%.*]]) {
@@ -539,3 +547,107 @@ define void @prop_no_conflict_writable2(ptr %p) {
ret void
}
+declare void @bar4(i32)
+
+define dso_local void @foo4_range_0_10(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo4_range_0_10
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @bar4(i32 range(i32 0, 10) %v)
+ ret void
+}
+
+define dso_local void @foo4_2_range_0_10(i32 range(i32 0, 10) %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo4_2_range_0_10
+; CHECK-SAME: (i32 range(i32 0, 10) [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @bar4(i32 %v)
+ ret void
+}
+
+
+define dso_local void @foo4(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo4
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @bar4(i32 %v)
+ ret void
+}
+
+
+
+define void @prop_range_empty_intersect(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_intersect
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 0, 0) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @foo4_range_0_10(i32 range(i32 11, 50) %v)
+ ret void
+}
+
+define void @prop_range_empty(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_empty
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 1, 0) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @foo4(i32 range(i32 1, 0) %v)
+ ret void
+}
+
+define void @prop_range_empty_with_intersect(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_with_intersect
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 1, 10) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @foo4_range_0_10(i32 range(i32 1, 0) %v)
+ ret void
+}
+
+define void @prop_range_intersect1(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect1
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 0, 9) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @foo4_range_0_10(i32 range(i32 0, 9) %v)
+ ret void
+}
+
+define void @prop_range_intersect2(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect2
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 1, 9) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @foo4_range_0_10(i32 range(i32 1, 9) %v)
+ ret void
+}
+
+define void @prop_range_intersect3(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect3
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 0, 11) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @foo4_2_range_0_10(i32 range(i32 0, 11) %v)
+ ret void
+}
+
+define void @prop_range_direct(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_direct
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT: call void @bar4(i32 range(i32 1, 11) [[V]])
+; CHECK-NEXT: ret void
+;
+ call void @foo4(i32 range(i32 1, 11) %v)
+ ret void
+}
diff --git a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll
index 1a219a22019c43..c0943f4aefb8f9 100644
--- a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll
+++ b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll
@@ -8,7 +8,7 @@ declare void @h(ptr %p, ptr %q, ptr %z)
define void @f(ptr %p, ptr %q, ptr %z) {
; CHECK-LABEL: define void @f
; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[Z:%.*]]) {
-; CHECK-NEXT: call void @h(ptr [[P]], ptr [[Q]], ptr [[Z]])
+; CHECK-NEXT: call void @h(ptr nonnull [[P]], ptr [[Q]], ptr nonnull [[Z]])
; CHECK-NEXT...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops I forgot to submit my review comment :(
420da82
to
c63b635
Compare
|
||
// We can propagate both poison generating an UB generating attributes | ||
// without any extra checks. The only attribute that is tricky to propagate | ||
// is `noundef` (skipped for now) as that can create new UB where previous |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
noundef
should be safe to propagate. If we pass a poison/undef value into the callee, we must trigger an immediate UB.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There definitely are cases we can propagate it, but since it need specialized logic I'm keeping separate from this patch.
Next patch hopefully can get no capture, then maybe noundef.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I misread, you can still create new UB in that noundef
then poison-generating attribute is fine, but combined they create immediate UB i.e: https://alive2.llvm.org/ce/z/CgBHpb
c63b635
to
e8e15d6
Compare
if (!CombinedRange.has_value()) | ||
CombinedRange = | ||
ConstantRange::getEmpty(NewRange->getBitWidth()); | ||
AL = AL.removeParamAttribute(Context, I, Attribute::Range); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this to make it more clear what is happening that removeParamAttribute is called? as addRangeParamAttr will replace the current value there is no need to call removeParamAttribute.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm? Thought add param honors the existing parameter. It does for all other attrs at least.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hmm if I have not missed something in the end this will be called
llvm-project/llvm/lib/IR/Attributes.cpp
Lines 1782 to 1790 in 3591da9
template <typename K> | |
static void addAttributeImpl(SmallVectorImpl<Attribute> &Attrs, K Kind, | |
Attribute Attr) { | |
auto It = lower_bound(Attrs, Kind, AttributeComparator()); | |
if (It != Attrs.end() && It->hasAttribute(Kind)) | |
std::swap(*It, Attr); | |
else | |
Attrs.insert(It, Attr); | |
} |
so swap will be called and replace the current value or?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hmm if I have not missed something in the end this will be called
llvm-project/llvm/lib/IR/Attributes.cpp
Lines 1782 to 1790 in 3591da9
template <typename K> static void addAttributeImpl(SmallVectorImpl<Attribute> &Attrs, K Kind, Attribute Attr) { auto It = lower_bound(Attrs, Kind, AttributeComparator()); if (It != Attrs.end() && It->hasAttribute(Kind)) std::swap(*It, Attr); else Attrs.insert(It, Attr); } so swap will be called and replace the current value or?
Its from the Merge
call in addAttributesAtIndex
.
// For range we use the exact intersection. | ||
if (ExistingRange.has_value()) { | ||
if (auto NewRange = ValidExactParamAttrs[ArgNo].getRange()) { | ||
auto CombinedRange = ExistingRange->exactIntersectWith(*NewRange); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the reason to poison all values if exactIntersectWith is returning nullopt? is it not better to remove the range attribute instead of setting an empty range? or selecting the smallest range as is done by calling intersectWith
feels like it is possible to assume that both ranges are valid only that the intersection gives multiple ranges.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well empty range implies that the incoming value is always poison. Think its useful to preserve that information.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But is it an empty range? if eg the ranges is [30, 20) and [10, 40) values in the ranges [10,20) and [30,40) are not poison but exactIntersectWith will return nullopt. I Think that it is possible to get ranges like that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh that buggy, let me fix.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed + tests
e8e15d6
to
79c820c
Compare
79c820c
to
a80d5c0
Compare
a80d5c0
to
d519556
Compare
rebased |
ping |
1 similar comment
ping |
d519556
to
86e5007
Compare
Rebased (after we remove |
ping |
The invoke merging problem still exists, though the impact dropped:
Need to adjust this:
|
f1ecf0b
to
560bd05
Compare
560bd05
to
60cbba6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
NewAB.removeAttribute(Attribute::Range); | ||
NewAB.addRangeAttr(CombinedRange); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, looking at this code again, shouldn't this be querying the attributes on the CallBase instead of the AttributeList? For example, if you have an existing dereferenceable(8)
on the function definition and nothing on the call-site, then this could infer dereferenceable(4)
at the call-site, which would take precedence over the definition. Though now that I check the implementation of getParamDereferenceableBytes(), it doesn't actually implement the usual fallback to inspecting the attributes on the callee. So for now checking AL is probably fine...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch. I think this actually implies that we have a bug with out byval
handling and should be fixed.
Take:
https://godbolt.org/z/Wj9na57nn
we are propagating readonly
to byval
, but not picking up byval
because its only checking the callbase.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i will post a fix later tonight.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other than the bug with ByVal
, I would say that in InstCombine or something we should remove redundant attrs from callbases rather than require yet more special case handling for callbase attrs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Going to merge this (assuming no objections to the above comment).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, what is your opposition to just cleaning it up explicitly in one place as opposed to having to manage the special cases everywhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code is expected to query attributes on CallBase, not the underlying attribute list. If everything works on CallBase, and CallBase implements attribute inheritance correctly, there should not be any special cases relative to the current code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed in query, but not in setting. Seems like a generally easier to design to write whatever is correct to callbase attr list, then let some later pass handle optimizing it properly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure I follow. You are already querying the existing attributes here, what difference does it make in terms of complexity to query them on CallBase instead of AttributeList?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, as long we write the CallBase
access APIs to take the best of whats on the Function and CallBase its probably fine. Ill work on updating.
Add support for propagating: - `derefereancable` - `derefereancable_or_null` - `align` - `nonnull` These are only propagated if the parameter to the to-be-inlined callsite match the exact parameter used in the to-be-inlined function.
577928b
to
36d5ca0
Compare
llvm/llvm-project#91101 propagates range information across inlining, resulting in more metadata in this test. Tolerate the range metadata if it appears.
This causes broken IR:
|
I've reverted this in 9e6d24f |
Think root cause is a bug in InstCombine where we narrow type but don't update the type of the |
llvm: Tolerate propagated range metadata llvm/llvm-project#91101 propagates range information across inlining, resulting in more metadata in this test. Tolerate the range metadata if it appears. ``@rustbot:`` label +llvm-main r? ``@durin42`` Please wait a moment before approving, putting the llvm-main tag on it to make sure it fixes the integration test.
- **[Inliner] Add tests for propagating more parameter attributes; NFC** - **[Inliner] Propagate more attributes to params when inlining** Add support for propagating: - `derefereancable` - `derefereancable_or_null` - `align` - `nonnull` - `range` These are only propagated if the parameter to the to-be-inlined callsite match the exact parameter used in the to-be-inlined function.
…lvm#91101)" This reverts commit ae778ae. Creates broken IR, see comments in llvm#91101.
Rollup merge of rust-lang#131798 - maurer:range-inlining, r=durin42 llvm: Tolerate propagated range metadata llvm/llvm-project#91101 propagates range information across inlining, resulting in more metadata in this test. Tolerate the range metadata if it appears. ``@rustbot:`` label +llvm-main r? ``@durin42`` Please wait a moment before approving, putting the llvm-main tag on it to make sure it fixes the integration test.
…lvm#91101)" (2nd Attempt) Root cause of the bug was code hanging onto `range` attr after changing BitWidth. This was fixed in PR llvm#112633.
…lvm#91101)" (2nd Attempt) (llvm#112749) Root cause of the bug was code hanging onto `range` attr after changing BitWidth. This was fixed in PR llvm#112633.
- **[Inliner] Add tests for propagating more parameter attributes; NFC** - **[Inliner] Propagate more attributes to params when inlining** Add support for propagating: - `derefereancable` - `derefereancable_or_null` - `align` - `nonnull` - `range` These are only propagated if the parameter to the to-be-inlined callsite match the exact parameter used in the to-be-inlined function.
…lvm#91101)" This reverts commit ae778ae. Creates broken IR, see comments in llvm#91101.
…lvm#91101)" (2nd Attempt) (llvm#112749) Root cause of the bug was code hanging onto `range` attr after changing BitWidth. This was fixed in PR llvm#112633.
range
attributes to params when inliningAdd support for propagating:
-
derefereancable
-
derefereancable_or_null
-
align
-
nonnull
-
range
These are only propagated if the parameter to the to-be-inlined callsite match the exact parameter used in the to-be-inlined function.