diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 58c59628342c7..c43acb1780d37 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1145,7 +1145,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, ScalableOffset = -ScalableOffset; if (all_of(N->users(), [&](SDNode *Node) { if (auto *LoadStore = dyn_cast(Node); - LoadStore && LoadStore->getBasePtr().getNode() == N) { + LoadStore && LoadStore->hasUniqueMemOperand() && + LoadStore->getBasePtr().getNode() == N) { TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.ScalableOffset = ScalableOffset; @@ -1183,6 +1184,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, for (SDNode *Node : N->users()) { if (auto *LoadStore = dyn_cast(Node)) { + if (!LoadStore->hasUniqueMemOperand()) + continue; // Is x[offset2] already not a legal addressing mode? If so then // reassociating the constants breaks nothing (we test offset2 because // that's the one we hope to fold into the load or store). diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll new file mode 100644 index 0000000000000..55159634eb4e5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s + +; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not +; crash when a MemSDNode user has multiple memory operands (e.g. +; buffer_load_lds which reads from a buffer and writes to LDS). + +@global_smem = external addrspace(3) global [0 x i8], align 16 + +declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i16, i64, i32) +declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32) +declare i32 @llvm.amdgcn.workitem.id.x() + +define amdgpu_kernel void @buffer_load_lds_reassociate_offsets(ptr addrspace(1) inreg %ptr) { +; CHECK-LABEL: buffer_load_lds_reassociate_offsets: +; CHECK: ; %bb.1: +; CHECK-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_branch .LBB0_0 +; CHECK-NEXT: .p2align 8 +; CHECK-NEXT: ; %bb.2: +; CHECK-NEXT: .LBB0_0: +; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; CHECK-NEXT: v_mul_u32_u24_e32 v0, 0x600, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_add_u32_e32 v1, 0x840, v0 +; CHECK-NEXT: s_and_b32 s9, s9, 0xffff +; CHECK-NEXT: s_mov_b32 s11, 0x27000 +; CHECK-NEXT: s_mov_b32 s10, 0x7ffffffe +; CHECK-NEXT: s_mov_b32 m0, 0 +; CHECK-NEXT: v_add_u32_e32 v0, 0x842, v0 +; CHECK-NEXT: buffer_load_dwordx4 v1, s[8:11], 0 offen lds +; CHECK-NEXT: s_add_i32 m0, 0, 0x420 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_dwordx4 v0, s[8:11], 0 offen lds +; CHECK-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + ; Create a pattern that will be reassociated: (add (add base, 1024), 32) + ; where base comes from mul, creating nested adds + %base = mul i32 %tid, 1536 + %add1 = add i32 %base, 1024 + %offset1 = add i32 %add1, 32 + %offset2 = add i32 %add1, 33 + %shl1 = shl i32 %offset1, 1 + %shl2 = shl i32 %offset2, 1 + %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %ptr, i16 0, i64 2147483646, i32 159744) + %lds0 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 0 + %lds1 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 1056 + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds0, i32 16, i32 %shl1, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds1, i32 16, i32 %shl2, i32 0, i32 0, i32 0) + ret void +}