From 7d416508364a22b18c01a67d5dcaa151e9509fd8 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 18 Jan 2026 01:46:45 +0900 Subject: [PATCH] [AArch64] Fix fmaxnm/fminnm SNAN handling in min/max combine AArch64's fmaxnm/fminnm convert SNAN to QNAN, unlike fcmp+select. Extend isProfitableToCombineMinNumMaxNum hook to prevent combine unless SNAN is proven absent or nnan flag is set. --- llvm/include/llvm/CodeGen/TargetLowering.h | 7 ++++- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 ++++-- .../Target/AArch64/AArch64ISelLowering.cpp | 30 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 23 +++----------- 5 files changed, 51 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 0c185ed0c4e24..ca34c30011841 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2522,7 +2522,12 @@ class LLVM_ABI TargetLoweringBase { Action != TypeSplitVector; } - virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } + virtual bool isProfitableToCombineMinNumMaxNum(EVT VT, SDValue LHS, + SDValue RHS, + const SDNodeFlags &Flags, + SelectionDAG &DAG) const { + return true; + } /// Return true if a select of constants (select Cond, C1, C2) should be /// transformed into simple math ops with the condition value. For example: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4567b4017a7e1..ec556de6b2ce5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12015,8 +12015,13 @@ static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, if (!VT.isFloatingPoint()) return false; - return Flags.hasNoSignedZeros() && - TLI.isProfitableToCombineMinNumMaxNum(VT) && + const TargetOptions &Options = DAG.getTarget().Options; + + // The target can decide whether to combine based on value types, operands, + // flags, and NaN analysis. This allows targets like Aarch64 to implement + // specific logic for handling NaN semantics of their min/max instructions. + return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) && + TLI.isProfitableToCombineMinNumMaxNum(VT, LHS, RHS, Flags, DAG) && (Flags.hasNoNaNs() || (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS))); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d550dbaf40a4d..1464a0dc9adc9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -949,6 +949,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(Op, MVT::f16, Legal); } + // fmaxnm/fminnm convert SNAN to QNAN, unlike fcmp+select. + // Expand scalar types to preserve SNAN semantics. + setOperationAction(ISD::FMINNUM, MVT::f32, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f32, Expand); + setOperationAction(ISD::FMINNUM, MVT::f64, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f64, Expand); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Expand); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Expand); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Expand); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); @@ -17699,6 +17710,25 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { I->getFastMathFlags().allowContract())); } +bool AArch64TargetLowering::isProfitableToCombineMinNumMaxNum( + EVT VT, SDValue LHS, SDValue RHS, const SDNodeFlags &Flags, + SelectionDAG &DAG) const { + // Vector types: always allow combination (vectors use different instructions) + if (VT.isVector()) + return true; + + // Explicit nnan flag: safe to optimize + if (Flags.hasNoNaNs()) + return true; + + // fmaxnm/fminnm convert SNAN to QNAN, unlike fcmp+select. + // Only combine if we can prove no SNAN is present. + if (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS)) + return true; + + return false; +} + // All 32-bit GPR operations implicitly zero the high-half of the corresponding // 64-bit GPR. bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 1bf36559edd27..2c2ab6a329645 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -220,6 +220,10 @@ class AArch64TargetLowering : public TargetLowering { bool isProfitableToHoist(Instruction *I) const override; + bool isProfitableToCombineMinNumMaxNum(EVT VT, SDValue LHS, SDValue RHS, + const SDNodeFlags &Flags, + SelectionDAG &DAG) const override; + bool isZExtFree(Type *Ty1, Type *Ty2) const override; bool isZExtFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 013bc11d7b032..ac04c1383627d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5515,9 +5515,10 @@ defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; let SchedRW = [WriteFDiv] in { defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; } -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; +// No pattern - expanded to fcmp+select for SNAN correctness. +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm">; defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm">; defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; let SchedRW = [WriteFMul] in { defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; @@ -5565,29 +5566,13 @@ def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; - -def : Pat<(fminnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)), - (FMINNMDrr FPR64:$a, FPR64:$b)>; -def : Pat<(fmaxnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)), - (FMAXNMDrr FPR64:$a, FPR64:$b)>; + def : Pat<(f64 (fcanonicalize f64:$a)), (FMINNMDrr f64:$a, f64:$a)>; -def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)), - (FMINNMSrr FPR32:$a, FPR32:$b)>; -def : Pat<(fmaxnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)), - (FMAXNMSrr FPR32:$a, FPR32:$b)>; def : Pat<(f32 (fcanonicalize f32:$a)), (FMINNMSrr f32:$a, f32:$a)>; let Predicates = [HasFullFP16] in { -def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)), - (FMINNMHrr FPR16:$a, FPR16:$b)>; -def : Pat<(fmaxnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)), - (FMAXNMHrr FPR16:$a, FPR16:$b)>; def : Pat<(f16 (fcanonicalize f16:$a)), (FMINNMHrr f16:$a, f16:$a)>; }