Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release/18.x: [GlobalISel] Fix store merging incorrectly classifying an unknown index expr as 0. (#90375) #90673

Merged
merged 1 commit into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,23 @@ struct LegalityQuery;
class MachineRegisterInfo;
namespace GISelAddressing {
/// Helper struct to store a base, index and offset that forms an address
struct BaseIndexOffset {
class BaseIndexOffset {
private:
Register BaseReg;
Register IndexReg;
int64_t Offset = 0;
bool IsIndexSignExt = false;
std::optional<int64_t> Offset;

public:
BaseIndexOffset() = default;
Register getBase() { return BaseReg; }
Register getBase() const { return BaseReg; }
Register getIndex() { return IndexReg; }
Register getIndex() const { return IndexReg; }
void setBase(Register NewBase) { BaseReg = NewBase; }
void setIndex(Register NewIndex) { IndexReg = NewIndex; }
void setOffset(std::optional<int64_t> NewOff) { Offset = NewOff; }
bool hasValidOffset() const { return Offset.has_value(); }
int64_t getOffset() const { return *Offset; }
};

/// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
Expand Down Expand Up @@ -89,7 +101,7 @@ class LoadStoreOpt : public MachineFunctionPass {
// order stores are writing to incremeneting consecutive addresses. So when
// we walk the block in reverse order, the next eligible store must write to
// an offset one store width lower than CurrentLowestOffset.
uint64_t CurrentLowestOffset;
int64_t CurrentLowestOffset;
SmallVector<GStore *> Stores;
// A vector of MachineInstr/unsigned pairs to denote potential aliases that
// need to be checked before the candidate is considered safe to merge. The
Expand Down
48 changes: 28 additions & 20 deletions llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,20 @@ BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
MachineRegisterInfo &MRI) {
BaseIndexOffset Info;
Register PtrAddRHS;
if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
Info.BaseReg = Ptr;
Info.IndexReg = Register();
Info.IsIndexSignExt = false;
Register BaseReg;
if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) {
Info.setBase(Ptr);
Info.setOffset(0);
return Info;
}

Info.setBase(BaseReg);
auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
if (RHSCst)
Info.Offset = RHSCst->Value.getSExtValue();
Info.setOffset(RHSCst->Value.getSExtValue());

// Just recognize a simple case for now. In future we'll need to match
// indexing patterns for base + index + constant.
Info.IndexReg = PtrAddRHS;
Info.IsIndexSignExt = false;
Info.setIndex(PtrAddRHS);
return Info;
}

Expand All @@ -114,15 +113,16 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);

if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid())
return false;

int64_t Size1 = LdSt1->getMemSize();
int64_t Size2 = LdSt2->getMemSize();

int64_t PtrDiff;
if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
BasePtr1.hasValidOffset()) {
PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
// If the size of memory access is unknown, do not use it to do analysis.
// One example of unknown size memory access is to load/store scalable
// vector objects on the stack.
Expand Down Expand Up @@ -151,8 +151,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
// able to calculate their relative offset if at least one arises
// from an alloca. However, these allocas cannot overlap and we
// can infer there is no alias.
auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI);
auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI);
if (!Base0Def || !Base1Def)
return false; // Couldn't tell anything.

Expand Down Expand Up @@ -520,16 +520,20 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,

Register StoreAddr = StoreMI.getPointerReg();
auto BIO = getPointerInfo(StoreAddr, *MRI);
Register StoreBase = BIO.BaseReg;
uint64_t StoreOffCst = BIO.Offset;
Register StoreBase = BIO.getBase();
if (C.Stores.empty()) {
C.BasePtr = StoreBase;
if (!BIO.hasValidOffset()) {
C.CurrentLowestOffset = 0;
} else {
C.CurrentLowestOffset = BIO.getOffset();
}
// This is the first store of the candidate.
// If the offset can't possibly allow for a lower addressed store with the
// same base, don't bother adding it.
if (StoreOffCst < ValueTy.getSizeInBytes())
if (BIO.hasValidOffset() &&
BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
return false;
C.BasePtr = StoreBase;
C.CurrentLowestOffset = StoreOffCst;
C.Stores.emplace_back(&StoreMI);
LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
<< StoreMI);
Expand All @@ -549,8 +553,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
// writes to the next lowest adjacent address.
if (C.BasePtr != StoreBase)
return false;
if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
static_cast<uint64_t>(StoreOffCst))
// If we don't have a valid offset, we can't guarantee to be an adjacent
// offset.
if (!BIO.hasValidOffset())
return false;
if ((C.CurrentLowestOffset -
static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
return false;

// This writes to an adjacent address. Allow it.
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
Original file line number Diff line number Diff line change
Expand Up @@ -323,3 +323,22 @@ define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_p
store i32 14, ptr %addr4
ret i32 %safeld
}

@G = external global [10 x i32]

define void @invalid_zero_offset_no_merge(i64 %0) {
; CHECK-LABEL: invalid_zero_offset_no_merge:
; CHECK: ; %bb.0:
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x8, _G@GOTPAGE
; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr x8, [x8, _G@GOTPAGEOFF]
; CHECK-NEXT: str wzr, [x8, x0, lsl #2]
; CHECK-NEXT: str wzr, [x8, #4]
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
%2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
store i32 0, ptr %2, align 4
store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
ret void
}
62 changes: 52 additions & 10 deletions llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@
ret void
}

@G = external global [10 x i32]
define void @invalid_zero_offset_no_merge(i64 %0) {
%2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
store i32 0, ptr %2, align 4
store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
ret void
}
...
---
name: test_simple_2xs8
Expand Down Expand Up @@ -582,13 +589,11 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
# The store to ptr2 prevents merging into a single store.
# We can still merge the stores into addr1 and addr2.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1

; The store to ptr2 prevents merging into a single store.
; We can still merge the stores into addr1 and addr2.

; CHECK-LABEL: name: test_alias_4xs16
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -639,10 +644,10 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
# Here store of 5 and 9 can be merged, others have aliasing barriers.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1, $x2
; Here store of 5 and 9 can be merged, others have aliasing barriers.
; CHECK-LABEL: name: test_alias2_4xs16
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -698,12 +703,11 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
# No merging can be done here.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1, $x2, $x3

; No merging can be done here.

; CHECK-LABEL: name: test_alias3_4xs16
; CHECK: liveins: $x0, $x1, $x2, $x3
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -767,12 +771,10 @@ stack:
- { id: 0, name: a1, size: 24, alignment: 4 }
- { id: 1, name: a2, size: 4, alignment: 4 }
machineFunctionInfo: {}
# Can merge because the load is from a different alloca and can't alias.
body: |
bb.1 (%ir-block.0):
liveins: $x0

; Can merge because the load is from a different alloca and can't alias.

; CHECK-LABEL: name: test_alias_allocas_2xs32
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -826,3 +828,43 @@ body: |
RET_ReallyLR

...
---
name: invalid_zero_offset_no_merge
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.1):
liveins: $x0

; CHECK-LABEL: name: invalid_zero_offset_no_merge
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64)
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @G
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[SHL]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.2)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[GV]], [[C2]](s64)
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
; CHECK-NEXT: RET_ReallyLR
%0:_(s64) = COPY $x0
%9:_(s64) = G_CONSTANT i64 2
%3:_(s64) = G_SHL %0, %9(s64)
%1:_(p0) = G_GLOBAL_VALUE @G
%4:_(p0) = G_PTR_ADD %1, %3(s64)
%6:_(s32) = G_CONSTANT i32 0
G_STORE %6(s32), %4(p0) :: (store (s32) into %ir.2)
%8:_(s64) = G_CONSTANT i64 4
%7:_(p0) = nuw G_PTR_ADD %1, %8(s64)
G_STORE %6(s32), %7(p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
RET_ReallyLR

...
Loading