[DAGCombiner] Fix crash in reassociationCanBreakAddressingModePattern for multi-memop nodes#180268
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Alexander Weinrauch (AlexAUT) ChangesTwo code paths in
Full diff: https://github.com/llvm/llvm-project/pull/180268.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 58c59628342c7..c43acb1780d37 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1145,7 +1145,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
ScalableOffset = -ScalableOffset;
if (all_of(N->users(), [&](SDNode *Node) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
- LoadStore && LoadStore->getBasePtr().getNode() == N) {
+ LoadStore && LoadStore->hasUniqueMemOperand() &&
+ LoadStore->getBasePtr().getNode() == N) {
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.ScalableOffset = ScalableOffset;
@@ -1183,6 +1184,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
for (SDNode *Node : N->users()) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+ if (!LoadStore->hasUniqueMemOperand())
+ continue;
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
new file mode 100644
index 0000000000000..f19eeca3065ec
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
+
+; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not
+; crash when a MemSDNode user has multiple memory operands (e.g.
+; buffer_load_lds which reads from a buffer and writes to LDS).
+
+@global_smem = external addrspace(3) global [0 x i8], align 16
+
+declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i16, i64, i32)
+declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
+declare i32 @llvm.amdgcn.workitem.id.x()
+
+define amdgpu_kernel void @triton_mm_minimal(ptr addrspace(1) inreg %ptr) {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ ; Create a pattern that will be reassociated: (add (add base, 1024), 32)
+ ; where base comes from mul, creating nested adds
+ %base = mul i32 %tid, 1536
+ %add1 = add i32 %base, 1024
+ %offset1 = add i32 %add1, 32
+ %offset2 = add i32 %add1, 33
+ %shl1 = shl i32 %offset1, 1
+ %shl2 = shl i32 %offset2, 1
+ %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %ptr, i16 0, i64 2147483646, i32 159744)
+ %lds0 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 0
+ %lds1 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 1056
+ call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds0, i32 16, i32 %shl1, i32 0, i32 0, i32 0)
+ call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds1, i32 16, i32 %shl2, i32 0, i32 0, i32 0)
+ ret void
+}
|
|
@llvm/pr-subscribers-llvm-selectiondag Author: Alexander Weinrauch (AlexAUT) ChangesTwo code paths in
Full diff: https://github.com/llvm/llvm-project/pull/180268.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 58c59628342c7..c43acb1780d37 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1145,7 +1145,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
ScalableOffset = -ScalableOffset;
if (all_of(N->users(), [&](SDNode *Node) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
- LoadStore && LoadStore->getBasePtr().getNode() == N) {
+ LoadStore && LoadStore->hasUniqueMemOperand() &&
+ LoadStore->getBasePtr().getNode() == N) {
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.ScalableOffset = ScalableOffset;
@@ -1183,6 +1184,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
for (SDNode *Node : N->users()) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+ if (!LoadStore->hasUniqueMemOperand())
+ continue;
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
new file mode 100644
index 0000000000000..f19eeca3065ec
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
+
+; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not
+; crash when a MemSDNode user has multiple memory operands (e.g.
+; buffer_load_lds which reads from a buffer and writes to LDS).
+
+@global_smem = external addrspace(3) global [0 x i8], align 16
+
+declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i16, i64, i32)
+declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
+declare i32 @llvm.amdgcn.workitem.id.x()
+
+define amdgpu_kernel void @triton_mm_minimal(ptr addrspace(1) inreg %ptr) {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ ; Create a pattern that will be reassociated: (add (add base, 1024), 32)
+ ; where base comes from mul, creating nested adds
+ %base = mul i32 %tid, 1536
+ %add1 = add i32 %base, 1024
+ %offset1 = add i32 %add1, 32
+ %offset2 = add i32 %add1, 33
+ %shl1 = shl i32 %offset1, 1
+ %shl2 = shl i32 %offset2, 1
+ %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %ptr, i16 0, i64 2147483646, i32 159744)
+ %lds0 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 0
+ %lds1 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 1056
+ call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds0, i32 16, i32 %shl1, i32 0, i32 0, i32 0)
+ call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds1, i32 16, i32 %shl2, i32 0, i32 0, i32 0)
+ ret void
+}
|
arsenm
left a comment
There was a problem hiding this comment.
Maybe fine as a crash fix but this is more conservative than necessary. This should still be able to detect if the multiple mem operand case breaks the addressing mode
| @@ -0,0 +1,29 @@ | |||
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s | |||
There was a problem hiding this comment.
Don't disable global isel unless a test is also running global isel
| @@ -0,0 +1,29 @@ | |||
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s | |||
|
|
|||
Looks like these cases were missed in #175843. |
nhaehnle
left a comment
There was a problem hiding this comment.
LGTM
(I think using auto-generated check lines would be best)
This pulls in: - llvm/llvm-project#179305 - llvm/llvm-project#180220 - llvm/llvm-project#180268 `NoInfsFPMath` got removed in llvm/llvm-project#180083
This pulls in: - llvm/llvm-project#179305 - llvm/llvm-project#180220 - llvm/llvm-project#180268 `NoInfsFPMath` got removed in llvm/llvm-project#180083 (cherry picked from commit 552306f)
LLVM bump cherry pick for rel/3.7, this improve stability across ROCm archs. This pulls in: - llvm/llvm-project#179305 - llvm/llvm-project#180220 - llvm/llvm-project#180268 `NoInfsFPMath` got removed in llvm/llvm-project#180083 (cherry picked from commit 552306f) <!--- The core Triton is a small number of people, and we receive many PRs (thank you!). To help us review your code more quickly, **if you are a new contributor (less than 3 PRs merged) we ask that you complete the following tasks and include the filled-out checklist in your PR description.** Complete the following tasks before sending your PR, and replace `[ ]` with `[x]` to indicate you have done them. --> # New contributor declaration - [ ] I am not making a trivial change, such as fixing a typo in a comment. - [ ] I have written a PR description following these [rules](https://cbea.ms/git-commit/#why-not-how). - [ ] I have run `pre-commit run --from-ref origin/main --to-ref HEAD`. - Select one of the following. - [ ] I have added tests. - `/test` for `lit` tests - `/unittest` for C++ tests - `/python/test` for end-to-end tests - [ ] This PR does not need a test because `FILL THIS IN`. - Select one of the following. - [ ] I have not added any `lit` tests. - [ ] The `lit` tests I have added follow these [best practices](https://mlir.llvm.org/getting_started/TestingGuide/#filecheck-best-practices), including the "tests should be minimal" section. (Usually running Python code and using the instructions it generates is not minimal.) Co-authored-by: Alexander Weinrauch <alexander.weinrauch@amd.com>
This pulls in: - llvm/llvm-project#179305 - llvm/llvm-project#180220 - llvm/llvm-project#180268 `NoInfsFPMath` got removed in llvm/llvm-project#180083
This pulls in: - llvm/llvm-project#179305 - llvm/llvm-project#180220 - llvm/llvm-project#180268 `NoInfsFPMath` got removed in llvm/llvm-project#180083
Two code paths in
reassociationCanBreakAddressingModePatternwere missing ahasUniqueMemOperand()guard before callinggetAddressSpace(). Note that onL1214we already have the same guard in place.getAddressSpace()chains throughgetPointerInfo()togetMemOperand(), which asserts that the node has exactly one memory operand.