From e64adf283b40638dbd4058b53276d47830a72e33 Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Sat, 31 May 2025 03:04:06 +0000 Subject: [PATCH 1/9] [DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA combine --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 46 ++++++++ .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 102 +++++------------- 2 files changed, 71 insertions(+), 77 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aba3c0f80a024..a37521e6b2690 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16736,6 +16736,28 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } } + // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z). + if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && + N0.getOpcode() == ISD::FREEZE) { + SDValue FrozenMul = N0.getOperand(0); + if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { + SDValue X = FrozenMul.getOperand(0); + SDValue Y = FrozenMul.getOperand(1); + return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1); + } + } + + // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x) + if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && + N1.getOpcode() == ISD::FREEZE) { + SDValue FrozenMul = N1.getOperand(0); + if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { + SDValue X = FrozenMul.getOperand(0); + SDValue Y = FrozenMul.getOperand(1); + return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0); + } + } + // More folding opportunities when target permits. if (Aggressive) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) @@ -17013,6 +17035,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } } + // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z)) + if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && + N0.getOpcode() == ISD::FREEZE) { + SDValue FrozenMul = N0.getOperand(0); + if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { + SDValue X = FrozenMul.getOperand(0); + SDValue Y = FrozenMul.getOperand(1); + SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1); + return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ); + } + } + + // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z) + if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && + N1.getOpcode() == ISD::FREEZE) { + SDValue FrozenMul = N1.getOperand(0); + if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { + SDValue X = FrozenMul.getOperand(0); + SDValue Y = FrozenMul.getOperand(1); + SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X); + return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0); + } + } + auto isReassociable = [&Options](SDNode *N) { return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); }; diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index a1b2dbda687fb..75fe67e743c03 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -1,106 +1,54 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11 define float @fma_from_freeze_mul_add_left(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_add_left: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: fma_from_freeze_mul_add_left: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +bb: %mul = fmul contract float %x, %y %mul.fr = freeze float %mul %add = fadd contract float %mul.fr, 1.000000e+00 ret float %add } -define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul nnan contract afn float %x, %y - %mul.fr = freeze float %mul - %add = fadd nnan contract float %mul.fr, 1.000000e+00 - ret float %add -} - define float @fma_from_freeze_mul_add_right(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_add_right: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: fma_from_freeze_mul_add_right: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +bb: %mul = fmul contract float %x, %y %mul.fr = freeze float %mul %add = fadd contract float 1.000000e+00, %mul.fr ret float %add } -define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul nnan contract float %x, %y - %mul.fr = freeze float %mul - %add = fadd nnan contract float 1.000000e+00, %mul.fr - ret float %add -} - define float @fma_from_freeze_mul_sub_left(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_sub_left: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: fma_from_freeze_mul_sub_left: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f32 v0, v0, v1, -1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +bb: %mul = fmul contract float %x, %y %mul.fr = freeze float %mul %sub = fsub contract float %mul.fr, 1.000000e+00 ret float %sub } -define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_add_f32_e32 v0, -1.0, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul nnan contract float %x, %y - %mul.fr = freeze float %mul - %sub = fsub nnan contract float %mul.fr, 1.000000e+00 - ret float %sub -} - define float @fma_from_freeze_mul_sub_right(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_sub_right: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: fma_from_freeze_mul_sub_right: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f32 v0, -v0, v1, 1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +bb: %mul = fmul contract float %x, %y %mul.fr = freeze float %mul %sub = fsub contract float 1.000000e+00, %mul.fr ret float %sub } - -define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) { -; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul nnan contract float %x, %y - %mul.fr = freeze float %mul - %sub = fsub nnan contract float 1.000000e+00, %mul.fr - ret float %sub -} From 3811073658351b3107615febf8eaa2007054acc9 Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Wed, 4 Jun 2025 11:24:59 +0800 Subject: [PATCH 2/9] [DAGCombiner] Update lit test. --- .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 118 ++++++++++++------ 1 file changed, 81 insertions(+), 37 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index 75fe67e743c03..840361473a157 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -1,54 +1,98 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s define float @fma_from_freeze_mul_add_left(float %x, float %y) { -; GFX11-LABEL: fma_from_freeze_mul_add_left: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -bb: - %mul = fmul contract float %x, %y - %mul.fr = freeze float %mul - %add = fadd contract float %mul.fr, 1.000000e+00 +; CHECK-LABEL: fma_from_freeze_mul_add_left: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00 + ret float %add +} + +define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) { +; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00 ret float %add } define float @fma_from_freeze_mul_add_right(float %x, float %y) { -; GFX11-LABEL: fma_from_freeze_mul_add_right: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -bb: - %mul = fmul contract float %x, %y - %mul.fr = freeze float %mul - %add = fadd contract float 1.000000e+00, %mul.fr +; CHECK-LABEL: fma_from_freeze_mul_add_right: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr + ret float %add +} + +define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) { +; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr ret float %add } define float @fma_from_freeze_mul_sub_left(float %x, float %y) { -; GFX11-LABEL: fma_from_freeze_mul_sub_left: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f32 v0, v0, v1, -1.0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -bb: - %mul = fmul contract float %x, %y - %mul.fr = freeze float %mul - %sub = fsub contract float %mul.fr, 1.000000e+00 +; CHECK-LABEL: fma_from_freeze_mul_sub_left: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00 + ret float %sub +} + +define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) { +; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00 ret float %sub } define float @fma_from_freeze_mul_sub_right(float %x, float %y) { -; GFX11-LABEL: fma_from_freeze_mul_sub_right: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f32 v0, -v0, v1, 1.0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -bb: - %mul = fmul contract float %x, %y - %mul.fr = freeze float %mul - %sub = fsub contract float 1.000000e+00, %mul.fr +; CHECK-LABEL: fma_from_freeze_mul_sub_right: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr + ret float %sub +} + +define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) { +; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul.fr = freeze float %mul + %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr ret float %sub } From 591717e6d4b23176b4278002a0572b13758c6dbe Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Wed, 4 Jun 2025 12:49:00 +0800 Subject: [PATCH 3/9] [DAGCombiner] Update for hasNoSignedZeros. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a37521e6b2690..f07edf5021e0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16737,8 +16737,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z). - if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && - N0.getOpcode() == ISD::FREEZE) { + bool CanContract = + (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && + (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()); + if (CanContract && N0.getOpcode() == ISD::FREEZE) { SDValue FrozenMul = N0.getOperand(0); if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { SDValue X = FrozenMul.getOperand(0); @@ -16748,8 +16750,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x) - if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && - N1.getOpcode() == ISD::FREEZE) { + if (CanContract && N1.getOpcode() == ISD::FREEZE) { SDValue FrozenMul = N1.getOperand(0); if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { SDValue X = FrozenMul.getOperand(0); @@ -17036,8 +17037,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z)) - if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && - N0.getOpcode() == ISD::FREEZE) { + bool CanContract = + (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && + (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()); + if (CanContract && N0.getOpcode() == ISD::FREEZE) { SDValue FrozenMul = N0.getOperand(0); if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { SDValue X = FrozenMul.getOperand(0); @@ -17048,8 +17051,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z) - if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && - N1.getOpcode() == ISD::FREEZE) { + if (CanContract && N1.getOpcode() == ISD::FREEZE) { SDValue FrozenMul = N1.getOperand(0); if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { SDValue X = FrozenMul.getOperand(0); From dd0ab154e5cc5d4c510c56bc23e86fc7405503eb Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Sat, 7 Jun 2025 10:36:00 +0000 Subject: [PATCH 4/9] [DAGCombiner] Allow freeze to sink through fmul by adding it to AllowMultipleMaybePoisonOperands --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 65 ++++--------------- 1 file changed, 13 insertions(+), 52 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f07edf5021e0a..fa8d1bb2d78a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16392,12 +16392,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { return SDValue(); bool AllowMultipleMaybePoisonOperands = - N0.getOpcode() == ISD::SELECT_CC || - N0.getOpcode() == ISD::SETCC || + N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC || N0.getOpcode() == ISD::BUILD_VECTOR || N0.getOpcode() == ISD::BUILD_PAIR || N0.getOpcode() == ISD::VECTOR_SHUFFLE || - N0.getOpcode() == ISD::CONCAT_VECTORS; + N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL; // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all // ones" or "constant" into something that depends on FrozenUndef. We can @@ -16495,7 +16494,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { SVN->getMask()); } else { // NOTE: this strips poison generating flags. - R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops); + // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan, + // ninf, nsz, or fast. + // However, contract, reassoc, afn, and arcp should be preserved, + // as these fast-math flags do not introduce poison values. + SDNodeFlags SrcFlags = N0->getFlags(); + SDNodeFlags SafeFlags; + SafeFlags.setAllowContract(SrcFlags.hasAllowContract()); + SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation()); + SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs()); + SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal()); + R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops, SafeFlags); } assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) && "Can't create node that may be undef/poison!"); @@ -16736,29 +16745,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } } - // fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z). - bool CanContract = - (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && - (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()); - if (CanContract && N0.getOpcode() == ISD::FREEZE) { - SDValue FrozenMul = N0.getOperand(0); - if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { - SDValue X = FrozenMul.getOperand(0); - SDValue Y = FrozenMul.getOperand(1); - return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1); - } - } - - // fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x) - if (CanContract && N1.getOpcode() == ISD::FREEZE) { - SDValue FrozenMul = N1.getOperand(0); - if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { - SDValue X = FrozenMul.getOperand(0); - SDValue Y = FrozenMul.getOperand(1); - return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0); - } - } - // More folding opportunities when target permits. if (Aggressive) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) @@ -17036,31 +17022,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } } - // fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z)) - bool CanContract = - (Options.UnsafeFPMath || N->getFlags().hasAllowContract()) && - (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()); - if (CanContract && N0.getOpcode() == ISD::FREEZE) { - SDValue FrozenMul = N0.getOperand(0); - if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { - SDValue X = FrozenMul.getOperand(0); - SDValue Y = FrozenMul.getOperand(1); - SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1); - return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ); - } - } - - // fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z) - if (CanContract && N1.getOpcode() == ISD::FREEZE) { - SDValue FrozenMul = N1.getOperand(0); - if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) { - SDValue X = FrozenMul.getOperand(0); - SDValue Y = FrozenMul.getOperand(1); - SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X); - return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0); - } - } - auto isReassociable = [&Options](SDNode *N) { return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); }; From e731c530e17f07777cc77c9fd092f6e5d8cf49d1 Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Sat, 7 Jun 2025 10:38:10 +0000 Subject: [PATCH 5/9] [DAGCombiner] Update test. --- .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index 840361473a157..a27b2920c73eb 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -7,9 +7,9 @@ define float @fma_from_freeze_mul_add_left(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul = fmul contract float %x, %y %mul.fr = freeze float %mul - %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00 + %add = fadd contract float %mul.fr, 1.000000e+00 ret float %add } @@ -19,9 +19,9 @@ define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul = fmul nnan contract afn float %x, %y %mul.fr = freeze float %mul - %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00 + %add = fadd nnan contract float %mul.fr, 1.000000e+00 ret float %add } @@ -31,9 +31,9 @@ define float @fma_from_freeze_mul_add_right(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul = fmul contract float %x, %y %mul.fr = freeze float %mul - %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr + %add = fadd contract float 1.000000e+00, %mul.fr ret float %add } @@ -43,9 +43,9 @@ define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul = fmul nnan contract float %x, %y %mul.fr = freeze float %mul - %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr + %add = fadd nnan contract float 1.000000e+00, %mul.fr ret float %add } @@ -55,9 +55,9 @@ define float @fma_from_freeze_mul_sub_left(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul = fmul contract float %x, %y %mul.fr = freeze float %mul - %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00 + %sub = fsub contract float %mul.fr, 1.000000e+00 ret float %sub } @@ -67,9 +67,9 @@ define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul = fmul nnan contract float %x, %y %mul.fr = freeze float %mul - %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00 + %sub = fsub nnan contract float %mul.fr, 1.000000e+00 ret float %sub } @@ -79,9 +79,9 @@ define float @fma_from_freeze_mul_sub_right(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nsz arcp contract afn float %x, %y + %mul = fmul contract float %x, %y %mul.fr = freeze float %mul - %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr + %sub = fsub contract float 1.000000e+00, %mul.fr ret float %sub } @@ -91,8 +91,8 @@ define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y + %mul = fmul nnan contract float %x, %y %mul.fr = freeze float %mul - %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr + %sub = fsub nnan contract float 1.000000e+00, %mul.fr ret float %sub } From 728c44bfe088ca7feb8b8f8e34043ff2806ffbce Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Sat, 7 Jun 2025 12:21:45 +0000 Subject: [PATCH 6/9] [DAGCombiner] Add a new test. --- .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index a27b2920c73eb..d762f75ccad26 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -96,3 +96,35 @@ define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) { %sub = fsub nnan contract float 1.000000e+00, %mul.fr ret float %sub } + +define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) { +; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_dual_subrev_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, 1.0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %fadd_x = fsub reassoc nnan nsz arcp contract float %x, 1.000000e+00 + %fadd_y = fadd reassoc nnan nsz arcp contract float %y, 1.000000e+00 + %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fadd_y + %mul.fr = freeze float %mul + %sub = fadd reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 + ret float %sub +} + +define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) { +; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_sub: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_dual_add_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, -1.0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %fadd_x = fadd reassoc nnan nsz arcp contract float %x, 1.000000e+00 + %fadd_y = fsub reassoc nnan nsz arcp contract float %y, 1.000000e+00 + %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fadd_y + %mul.fr = freeze float %mul + %sub = fsub reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 + ret float %sub +} From 7eabd44b19b83f20f7ad8cbfce3f28b94f8b6ef6 Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Sat, 7 Jun 2025 12:25:28 +0000 Subject: [PATCH 7/9] [DAGCombiner] Update test name. --- llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index d762f75ccad26..de59f69499897 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -105,9 +105,9 @@ define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %fadd_x = fsub reassoc nnan nsz arcp contract float %x, 1.000000e+00 + %fsub_x = fsub reassoc nnan nsz arcp contract float %x, 1.000000e+00 %fadd_y = fadd reassoc nnan nsz arcp contract float %y, 1.000000e+00 - %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fadd_y + %mul = fmul reassoc nnan nsz arcp contract afn float %fsub_x, %fadd_y %mul.fr = freeze float %mul %sub = fadd reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 ret float %sub @@ -122,8 +122,8 @@ define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %fadd_x = fadd reassoc nnan nsz arcp contract float %x, 1.000000e+00 - %fadd_y = fsub reassoc nnan nsz arcp contract float %y, 1.000000e+00 - %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fadd_y + %fsub_y = fsub reassoc nnan nsz arcp contract float %y, 1.000000e+00 + %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fsub_y %mul.fr = freeze float %mul %sub = fsub reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 ret float %sub From 11046e9a74a63234829a29f808c6f9b3ed47da1e Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Sat, 7 Jun 2025 12:27:58 +0000 Subject: [PATCH 8/9] [DAGCombiner] Update test name again. --- llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index de59f69499897..51084ead7e40b 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -109,8 +109,8 @@ define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) %fadd_y = fadd reassoc nnan nsz arcp contract float %y, 1.000000e+00 %mul = fmul reassoc nnan nsz arcp contract afn float %fsub_x, %fadd_y %mul.fr = freeze float %mul - %sub = fadd reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 - ret float %sub + %add = fadd reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 + ret float %add } define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) { From ecf76a2dd39c392bc685dc66025e39f1b4a17995 Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Sat, 7 Jun 2025 12:33:32 +0000 Subject: [PATCH 9/9] [DAGCombiner] Update test name again. --- .../CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index 51084ead7e40b..9a4ab9865369f 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -105,11 +105,11 @@ define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %fsub_x = fsub reassoc nnan nsz arcp contract float %x, 1.000000e+00 - %fadd_y = fadd reassoc nnan nsz arcp contract float %y, 1.000000e+00 - %mul = fmul reassoc nnan nsz arcp contract afn float %fsub_x, %fadd_y + %fsub_x = fsub nnan contract float %x, 1.000000e+00 + %fadd_y = fadd nnan contract float %y, 1.000000e+00 + %mul = fmul nnan contract float %fsub_x, %fadd_y %mul.fr = freeze float %mul - %add = fadd reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 + %add = fadd nnan contract float %mul.fr, 1.000000e+00 ret float %add } @@ -121,10 +121,10 @@ define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %fadd_x = fadd reassoc nnan nsz arcp contract float %x, 1.000000e+00 - %fsub_y = fsub reassoc nnan nsz arcp contract float %y, 1.000000e+00 - %mul = fmul reassoc nnan nsz arcp contract afn float %fadd_x, %fsub_y + %fadd_x = fadd nnan contract float %x, 1.000000e+00 + %fsub_y = fsub nnan contract float %y, 1.000000e+00 + %mul = fmul nnan contract float %fadd_x, %fsub_y %mul.fr = freeze float %mul - %sub = fsub reassoc nsz arcp contract afn contract float %mul.fr, 1.000000e+00 + %sub = fsub nnan contract float %mul.fr, 1.000000e+00 ret float %sub }