Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
ScalableOffset = -ScalableOffset;
if (all_of(N->users(), [&](SDNode *Node) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
LoadStore && LoadStore->getBasePtr().getNode() == N) {
LoadStore && LoadStore->hasUniqueMemOperand() &&
LoadStore->getBasePtr().getNode() == N) {
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.ScalableOffset = ScalableOffset;
Expand Down Expand Up @@ -1183,6 +1184,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,

for (SDNode *Node : N->users()) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
if (!LoadStore->hasUniqueMemOperand())
continue;
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing check lines

; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not
; crash when a MemSDNode user has multiple memory operands (e.g.
; buffer_load_lds which reads from a buffer and writes to LDS).

@global_smem = external addrspace(3) global [0 x i8], align 16

declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i16, i64, i32)
declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
declare i32 @llvm.amdgcn.workitem.id.x()

; CHECK-LABEL: buffer_load_lds_reassociate_offsets:
; CHECK: buffer_load_dwordx4 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen lds
; CHECK: buffer_load_dwordx4 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen lds
define amdgpu_kernel void @buffer_load_lds_reassociate_offsets(ptr addrspace(1) inreg %ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; Create a pattern that will be reassociated: (add (add base, 1024), 32)
; where base comes from mul, creating nested adds
%base = mul i32 %tid, 1536
%add1 = add i32 %base, 1024
%offset1 = add i32 %add1, 32
%offset2 = add i32 %add1, 33
%shl1 = shl i32 %offset1, 1
%shl2 = shl i32 %offset2, 1
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %ptr, i16 0, i64 2147483646, i32 159744)
%lds0 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 0
%lds1 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 1056
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds0, i32 16, i32 %shl1, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds1, i32 16, i32 %shl2, i32 0, i32 0, i32 0)
ret void
}