diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index aee9f0d36b3f0..23f9f4f5d06b2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8868,15 +8868,19 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, // First, implement comparison not propagating NaN. If no native fmin or fmax // available, use plain select with setcc instead. SDValue MinMax; - unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; + unsigned CompOpcIeee2008 = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; + unsigned CompOpcIeee2019Num = IsMax ? ISD::FMAXIMUMNUM : ISD::FMINIMUMNUM; unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM; // FIXME: We should probably define fminnum/fmaxnum variants with correct // signed zero behavior. bool MinMaxMustRespectOrderedZero = false; - if (isOperationLegalOrCustom(CompOpcIeee, VT)) { - MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags); + if (isOperationLegalOrCustom(CompOpcIeee2008, VT)) { + MinMax = DAG.getNode(CompOpcIeee2008, DL, VT, LHS, RHS, Flags); + MinMaxMustRespectOrderedZero = true; + } else if (isOperationLegalOrCustom(CompOpcIeee2019Num, VT)) { + MinMax = DAG.getNode(CompOpcIeee2019Num, DL, VT, LHS, RHS, Flags); MinMaxMustRespectOrderedZero = true; } else if (isOperationLegalOrCustom(CompOpc, VT)) { MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags); diff --git a/llvm/test/CodeGen/Hexagon/fminmax-v67.ll b/llvm/test/CodeGen/Hexagon/fminmax-v67.ll index 8ce34210c38cf..94810e32f0572 100644 --- a/llvm/test/CodeGen/Hexagon/fminmax-v67.ll +++ b/llvm/test/CodeGen/Hexagon/fminmax-v67.ll @@ -73,6 +73,45 @@ entry: ret float %0 } +; CHECK-LABEL: t1_2019 +; CHECK: dfmax +; CHECK: dfcmp.uo + +define dso_local double @t1_2019(double %a, double %b) local_unnamed_addr { +entry: + %0 = tail call double @llvm.maximum.f64(double %a, double %b) + ret double %0 +} + +; CHECK-LABEL: t2_2019 +; CHECK: dfmin +; CHECK: dfcmp.uo + +define dso_local double @t2_2019(double %a, double %b) local_unnamed_addr { +entry: + %0 = tail call double @llvm.minimum.f64(double %a, double %b) + ret double %0 +} + +; CHECK-LABEL: t3_2019 +; CHECK: sfmax +; CHECK: sfcmp.uo + +define dso_local float @t3_2019(float %a, float %b) local_unnamed_addr { +entry: + %0 = tail call float @llvm.maximum.f32(float %a, float %b) + ret float %0 +} + +; CHECK-LABEL: t4_2019 +; CHECK: sfmin +; CHECK: sfcmp.uo + +define dso_local float @t4_2019(float %a, float %b) local_unnamed_addr { +entry: + %0 = tail call float @llvm.minimum.f32(float %a, float %b) + ret float %0 +} declare double @llvm.minnum.f64(double, double) #1 declare double @llvm.maxnum.f64(double, double) #1 diff --git a/llvm/test/CodeGen/Hexagon/fminmax.ll b/llvm/test/CodeGen/Hexagon/fminmax.ll index 807b6815fa47e..350fd0fa79be8 100644 --- a/llvm/test/CodeGen/Hexagon/fminmax.ll +++ b/llvm/test/CodeGen/Hexagon/fminmax.ll @@ -51,6 +51,24 @@ entry: ret float %call } +; CHECK-LABEL: minimum +; CHECK: sfmin +; CHECK: sfcmp.uo +define float @minimum(float %x, float %y) #0 { +entry: + %call = tail call float @llvm.minimum.f32(float %x, float %y) #1 + ret float %call +} + +; CHECK-LABEL: maximum +; CHECK: sfmax +; CHECK: sfcmp.uo +define float @maximum(float %x, float %y) #0 { +entry: + %call = tail call float @llvm.maximum.f32(float %x, float %y) #1 + ret float %call +} + declare float @fminf(float, float) #0 declare float @fmaxf(float, float) #0 diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-sm60.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-sm60.ll index 7509cb53e424c..b701de1be8894 100644 --- a/llvm/test/CodeGen/NVPTX/atomicrmw-sm60.ll +++ b/llvm/test/CodeGen/NVPTX/atomicrmw-sm60.ll @@ -1836,30 +1836,24 @@ define float @fmax_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %val) define float @fminimum_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %val) { ; SM60-LABEL: fminimum_acq_rel_float_global_cta( ; SM60: { -; SM60-NEXT: .reg .pred %p<6>; -; SM60-NEXT: .reg .b32 %r<9>; +; SM60-NEXT: .reg .pred %p<3>; +; SM60-NEXT: .reg .b32 %r<6>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b32 %r2, [fminimum_acq_rel_float_global_cta_param_1]; ; SM60-NEXT: ld.param.b64 %rd1, [fminimum_acq_rel_float_global_cta_param_0]; ; SM60-NEXT: membar.cta; -; SM60-NEXT: ld.global.b32 %r8, [%rd1]; -; SM60-NEXT: setp.eq.b32 %p3, %r2, -2147483648; +; SM60-NEXT: ld.global.b32 %r5, [%rd1]; ; SM60-NEXT: $L__BB64_1: // %atomicrmw.start ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: setp.nan.f32 %p1, %r8, %r2; -; SM60-NEXT: min.f32 %r3, %r8, %r2; +; SM60-NEXT: setp.nan.f32 %p1, %r5, %r2; +; SM60-NEXT: min.f32 %r3, %r5, %r2; ; SM60-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; SM60-NEXT: setp.eq.b32 %p2, %r8, -2147483648; -; SM60-NEXT: selp.f32 %r5, %r8, %r4, %p2; -; SM60-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; SM60-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; SM60-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; SM60-NEXT: atom.cta.global.cas.b32 %r1, [%rd1], %r8, %r7; -; SM60-NEXT: setp.ne.b32 %p5, %r1, %r8; -; SM60-NEXT: mov.b32 %r8, %r1; -; SM60-NEXT: @%p5 bra $L__BB64_1; +; SM60-NEXT: atom.cta.global.cas.b32 %r1, [%rd1], %r5, %r4; +; SM60-NEXT: setp.ne.b32 %p2, %r1, %r5; +; SM60-NEXT: mov.b32 %r5, %r1; +; SM60-NEXT: @%p2 bra $L__BB64_1; ; SM60-NEXT: // %bb.2: // %atomicrmw.end ; SM60-NEXT: membar.cta; ; SM60-NEXT: st.param.b32 [func_retval0], %r1; @@ -1871,30 +1865,24 @@ define float @fminimum_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %v define float @fmaximum_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %val) { ; SM60-LABEL: fmaximum_acq_rel_float_global_cta( ; SM60: { -; SM60-NEXT: .reg .pred %p<6>; -; SM60-NEXT: .reg .b32 %r<9>; +; SM60-NEXT: .reg .pred %p<3>; +; SM60-NEXT: .reg .b32 %r<6>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b32 %r2, [fmaximum_acq_rel_float_global_cta_param_1]; ; SM60-NEXT: ld.param.b64 %rd1, [fmaximum_acq_rel_float_global_cta_param_0]; ; SM60-NEXT: membar.cta; -; SM60-NEXT: ld.global.b32 %r8, [%rd1]; -; SM60-NEXT: setp.eq.b32 %p3, %r2, 0; +; SM60-NEXT: ld.global.b32 %r5, [%rd1]; ; SM60-NEXT: $L__BB65_1: // %atomicrmw.start ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: setp.nan.f32 %p1, %r8, %r2; -; SM60-NEXT: max.f32 %r3, %r8, %r2; +; SM60-NEXT: setp.nan.f32 %p1, %r5, %r2; +; SM60-NEXT: max.f32 %r3, %r5, %r2; ; SM60-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; SM60-NEXT: setp.eq.b32 %p2, %r8, 0; -; SM60-NEXT: selp.f32 %r5, %r8, %r4, %p2; -; SM60-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; SM60-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; SM60-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; SM60-NEXT: atom.cta.global.cas.b32 %r1, [%rd1], %r8, %r7; -; SM60-NEXT: setp.ne.b32 %p5, %r1, %r8; -; SM60-NEXT: mov.b32 %r8, %r1; -; SM60-NEXT: @%p5 bra $L__BB65_1; +; SM60-NEXT: atom.cta.global.cas.b32 %r1, [%rd1], %r5, %r4; +; SM60-NEXT: setp.ne.b32 %p2, %r1, %r5; +; SM60-NEXT: mov.b32 %r5, %r1; +; SM60-NEXT: @%p2 bra $L__BB65_1; ; SM60-NEXT: // %bb.2: // %atomicrmw.end ; SM60-NEXT: membar.cta; ; SM60-NEXT: st.param.b32 [func_retval0], %r1; @@ -1999,29 +1987,23 @@ define double @fmax_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %va define double @fminimum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %val) { ; SM60-LABEL: fminimum_acq_rel_double_global_cta( ; SM60: { -; SM60-NEXT: .reg .pred %p<6>; -; SM60-NEXT: .reg .b64 %rd<10>; +; SM60-NEXT: .reg .pred %p<3>; +; SM60-NEXT: .reg .b64 %rd<7>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd3, [fminimum_acq_rel_double_global_cta_param_1]; ; SM60-NEXT: ld.param.b64 %rd2, [fminimum_acq_rel_double_global_cta_param_0]; ; SM60-NEXT: membar.cta; -; SM60-NEXT: ld.global.b64 %rd9, [%rd2]; -; SM60-NEXT: setp.eq.b64 %p3, %rd3, -9223372036854775808; +; SM60-NEXT: ld.global.b64 %rd6, [%rd2]; ; SM60-NEXT: $L__BB70_1: // %atomicrmw.start ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: setp.nan.f64 %p1, %rd9, %rd3; -; SM60-NEXT: min.f64 %rd4, %rd9, %rd3; +; SM60-NEXT: setp.nan.f64 %p1, %rd6, %rd3; +; SM60-NEXT: min.f64 %rd4, %rd6, %rd3; ; SM60-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p1; -; SM60-NEXT: setp.eq.b64 %p2, %rd9, -9223372036854775808; -; SM60-NEXT: selp.f64 %rd6, %rd9, %rd5, %p2; -; SM60-NEXT: selp.f64 %rd7, %rd3, %rd6, %p3; -; SM60-NEXT: setp.eq.f64 %p4, %rd5, 0d0000000000000000; -; SM60-NEXT: selp.f64 %rd8, %rd7, %rd5, %p4; -; SM60-NEXT: atom.cta.global.cas.b64 %rd1, [%rd2], %rd9, %rd8; -; SM60-NEXT: setp.ne.b64 %p5, %rd1, %rd9; -; SM60-NEXT: mov.b64 %rd9, %rd1; -; SM60-NEXT: @%p5 bra $L__BB70_1; +; SM60-NEXT: atom.cta.global.cas.b64 %rd1, [%rd2], %rd6, %rd5; +; SM60-NEXT: setp.ne.b64 %p2, %rd1, %rd6; +; SM60-NEXT: mov.b64 %rd6, %rd1; +; SM60-NEXT: @%p2 bra $L__BB70_1; ; SM60-NEXT: // %bb.2: // %atomicrmw.end ; SM60-NEXT: membar.cta; ; SM60-NEXT: st.param.b64 [func_retval0], %rd1; @@ -2033,29 +2015,23 @@ define double @fminimum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double define double @fmaximum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %val) { ; SM60-LABEL: fmaximum_acq_rel_double_global_cta( ; SM60: { -; SM60-NEXT: .reg .pred %p<6>; -; SM60-NEXT: .reg .b64 %rd<10>; +; SM60-NEXT: .reg .pred %p<3>; +; SM60-NEXT: .reg .b64 %rd<7>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd3, [fmaximum_acq_rel_double_global_cta_param_1]; ; SM60-NEXT: ld.param.b64 %rd2, [fmaximum_acq_rel_double_global_cta_param_0]; ; SM60-NEXT: membar.cta; -; SM60-NEXT: ld.global.b64 %rd9, [%rd2]; -; SM60-NEXT: setp.eq.b64 %p3, %rd3, 0; +; SM60-NEXT: ld.global.b64 %rd6, [%rd2]; ; SM60-NEXT: $L__BB71_1: // %atomicrmw.start ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: setp.nan.f64 %p1, %rd9, %rd3; -; SM60-NEXT: max.f64 %rd4, %rd9, %rd3; +; SM60-NEXT: setp.nan.f64 %p1, %rd6, %rd3; +; SM60-NEXT: max.f64 %rd4, %rd6, %rd3; ; SM60-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p1; -; SM60-NEXT: setp.eq.b64 %p2, %rd9, 0; -; SM60-NEXT: selp.f64 %rd6, %rd9, %rd5, %p2; -; SM60-NEXT: selp.f64 %rd7, %rd3, %rd6, %p3; -; SM60-NEXT: setp.eq.f64 %p4, %rd5, 0d0000000000000000; -; SM60-NEXT: selp.f64 %rd8, %rd7, %rd5, %p4; -; SM60-NEXT: atom.cta.global.cas.b64 %rd1, [%rd2], %rd9, %rd8; -; SM60-NEXT: setp.ne.b64 %p5, %rd1, %rd9; -; SM60-NEXT: mov.b64 %rd9, %rd1; -; SM60-NEXT: @%p5 bra $L__BB71_1; +; SM60-NEXT: atom.cta.global.cas.b64 %rd1, [%rd2], %rd6, %rd5; +; SM60-NEXT: setp.ne.b64 %p2, %rd1, %rd6; +; SM60-NEXT: mov.b64 %rd6, %rd1; +; SM60-NEXT: @%p2 bra $L__BB71_1; ; SM60-NEXT: // %bb.2: // %atomicrmw.end ; SM60-NEXT: membar.cta; ; SM60-NEXT: st.param.b64 [func_retval0], %rd1; diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-sm70.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-sm70.ll index ec058567e9ec7..2ae21abe9fb2c 100644 --- a/llvm/test/CodeGen/NVPTX/atomicrmw-sm70.ll +++ b/llvm/test/CodeGen/NVPTX/atomicrmw-sm70.ll @@ -1836,30 +1836,24 @@ define float @fmax_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %val) define float @fminimum_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %val) { ; SM70-LABEL: fminimum_acq_rel_float_global_cta( ; SM70: { -; SM70-NEXT: .reg .pred %p<6>; -; SM70-NEXT: .reg .b32 %r<9>; +; SM70-NEXT: .reg .pred %p<3>; +; SM70-NEXT: .reg .b32 %r<6>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b32 %r2, [fminimum_acq_rel_float_global_cta_param_1]; ; SM70-NEXT: ld.param.b64 %rd1, [fminimum_acq_rel_float_global_cta_param_0]; ; SM70-NEXT: fence.acq_rel.cta; -; SM70-NEXT: ld.global.b32 %r8, [%rd1]; -; SM70-NEXT: setp.eq.b32 %p3, %r2, -2147483648; +; SM70-NEXT: ld.global.b32 %r5, [%rd1]; ; SM70-NEXT: $L__BB64_1: // %atomicrmw.start ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: setp.nan.f32 %p1, %r8, %r2; -; SM70-NEXT: min.f32 %r3, %r8, %r2; +; SM70-NEXT: setp.nan.f32 %p1, %r5, %r2; +; SM70-NEXT: min.f32 %r3, %r5, %r2; ; SM70-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; SM70-NEXT: setp.eq.b32 %p2, %r8, -2147483648; -; SM70-NEXT: selp.f32 %r5, %r8, %r4, %p2; -; SM70-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; SM70-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; SM70-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r1, [%rd1], %r8, %r7; -; SM70-NEXT: setp.ne.b32 %p5, %r1, %r8; -; SM70-NEXT: mov.b32 %r8, %r1; -; SM70-NEXT: @%p5 bra $L__BB64_1; +; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r1, [%rd1], %r5, %r4; +; SM70-NEXT: setp.ne.b32 %p2, %r1, %r5; +; SM70-NEXT: mov.b32 %r5, %r1; +; SM70-NEXT: @%p2 bra $L__BB64_1; ; SM70-NEXT: // %bb.2: // %atomicrmw.end ; SM70-NEXT: fence.acq_rel.cta; ; SM70-NEXT: st.param.b32 [func_retval0], %r1; @@ -1871,30 +1865,24 @@ define float @fminimum_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %v define float @fmaximum_acq_rel_float_global_cta(ptr addrspace(1) %addr, float %val) { ; SM70-LABEL: fmaximum_acq_rel_float_global_cta( ; SM70: { -; SM70-NEXT: .reg .pred %p<6>; -; SM70-NEXT: .reg .b32 %r<9>; +; SM70-NEXT: .reg .pred %p<3>; +; SM70-NEXT: .reg .b32 %r<6>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b32 %r2, [fmaximum_acq_rel_float_global_cta_param_1]; ; SM70-NEXT: ld.param.b64 %rd1, [fmaximum_acq_rel_float_global_cta_param_0]; ; SM70-NEXT: fence.acq_rel.cta; -; SM70-NEXT: ld.global.b32 %r8, [%rd1]; -; SM70-NEXT: setp.eq.b32 %p3, %r2, 0; +; SM70-NEXT: ld.global.b32 %r5, [%rd1]; ; SM70-NEXT: $L__BB65_1: // %atomicrmw.start ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: setp.nan.f32 %p1, %r8, %r2; -; SM70-NEXT: max.f32 %r3, %r8, %r2; +; SM70-NEXT: setp.nan.f32 %p1, %r5, %r2; +; SM70-NEXT: max.f32 %r3, %r5, %r2; ; SM70-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; SM70-NEXT: setp.eq.b32 %p2, %r8, 0; -; SM70-NEXT: selp.f32 %r5, %r8, %r4, %p2; -; SM70-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; SM70-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; SM70-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r1, [%rd1], %r8, %r7; -; SM70-NEXT: setp.ne.b32 %p5, %r1, %r8; -; SM70-NEXT: mov.b32 %r8, %r1; -; SM70-NEXT: @%p5 bra $L__BB65_1; +; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r1, [%rd1], %r5, %r4; +; SM70-NEXT: setp.ne.b32 %p2, %r1, %r5; +; SM70-NEXT: mov.b32 %r5, %r1; +; SM70-NEXT: @%p2 bra $L__BB65_1; ; SM70-NEXT: // %bb.2: // %atomicrmw.end ; SM70-NEXT: fence.acq_rel.cta; ; SM70-NEXT: st.param.b32 [func_retval0], %r1; @@ -1999,29 +1987,23 @@ define double @fmax_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %va define double @fminimum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %val) { ; SM70-LABEL: fminimum_acq_rel_double_global_cta( ; SM70: { -; SM70-NEXT: .reg .pred %p<6>; -; SM70-NEXT: .reg .b64 %rd<10>; +; SM70-NEXT: .reg .pred %p<3>; +; SM70-NEXT: .reg .b64 %rd<7>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd3, [fminimum_acq_rel_double_global_cta_param_1]; ; SM70-NEXT: ld.param.b64 %rd2, [fminimum_acq_rel_double_global_cta_param_0]; ; SM70-NEXT: fence.acq_rel.cta; -; SM70-NEXT: ld.global.b64 %rd9, [%rd2]; -; SM70-NEXT: setp.eq.b64 %p3, %rd3, -9223372036854775808; +; SM70-NEXT: ld.global.b64 %rd6, [%rd2]; ; SM70-NEXT: $L__BB70_1: // %atomicrmw.start ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: setp.nan.f64 %p1, %rd9, %rd3; -; SM70-NEXT: min.f64 %rd4, %rd9, %rd3; +; SM70-NEXT: setp.nan.f64 %p1, %rd6, %rd3; +; SM70-NEXT: min.f64 %rd4, %rd6, %rd3; ; SM70-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p1; -; SM70-NEXT: setp.eq.b64 %p2, %rd9, -9223372036854775808; -; SM70-NEXT: selp.f64 %rd6, %rd9, %rd5, %p2; -; SM70-NEXT: selp.f64 %rd7, %rd3, %rd6, %p3; -; SM70-NEXT: setp.eq.f64 %p4, %rd5, 0d0000000000000000; -; SM70-NEXT: selp.f64 %rd8, %rd7, %rd5, %p4; -; SM70-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd9, %rd8; -; SM70-NEXT: setp.ne.b64 %p5, %rd1, %rd9; -; SM70-NEXT: mov.b64 %rd9, %rd1; -; SM70-NEXT: @%p5 bra $L__BB70_1; +; SM70-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd6, %rd5; +; SM70-NEXT: setp.ne.b64 %p2, %rd1, %rd6; +; SM70-NEXT: mov.b64 %rd6, %rd1; +; SM70-NEXT: @%p2 bra $L__BB70_1; ; SM70-NEXT: // %bb.2: // %atomicrmw.end ; SM70-NEXT: fence.acq_rel.cta; ; SM70-NEXT: st.param.b64 [func_retval0], %rd1; @@ -2033,29 +2015,23 @@ define double @fminimum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double define double @fmaximum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %val) { ; SM70-LABEL: fmaximum_acq_rel_double_global_cta( ; SM70: { -; SM70-NEXT: .reg .pred %p<6>; -; SM70-NEXT: .reg .b64 %rd<10>; +; SM70-NEXT: .reg .pred %p<3>; +; SM70-NEXT: .reg .b64 %rd<7>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd3, [fmaximum_acq_rel_double_global_cta_param_1]; ; SM70-NEXT: ld.param.b64 %rd2, [fmaximum_acq_rel_double_global_cta_param_0]; ; SM70-NEXT: fence.acq_rel.cta; -; SM70-NEXT: ld.global.b64 %rd9, [%rd2]; -; SM70-NEXT: setp.eq.b64 %p3, %rd3, 0; +; SM70-NEXT: ld.global.b64 %rd6, [%rd2]; ; SM70-NEXT: $L__BB71_1: // %atomicrmw.start ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: setp.nan.f64 %p1, %rd9, %rd3; -; SM70-NEXT: max.f64 %rd4, %rd9, %rd3; +; SM70-NEXT: setp.nan.f64 %p1, %rd6, %rd3; +; SM70-NEXT: max.f64 %rd4, %rd6, %rd3; ; SM70-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p1; -; SM70-NEXT: setp.eq.b64 %p2, %rd9, 0; -; SM70-NEXT: selp.f64 %rd6, %rd9, %rd5, %p2; -; SM70-NEXT: selp.f64 %rd7, %rd3, %rd6, %p3; -; SM70-NEXT: setp.eq.f64 %p4, %rd5, 0d0000000000000000; -; SM70-NEXT: selp.f64 %rd8, %rd7, %rd5, %p4; -; SM70-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd9, %rd8; -; SM70-NEXT: setp.ne.b64 %p5, %rd1, %rd9; -; SM70-NEXT: mov.b64 %rd9, %rd1; -; SM70-NEXT: @%p5 bra $L__BB71_1; +; SM70-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd6, %rd5; +; SM70-NEXT: setp.ne.b64 %p2, %rd1, %rd6; +; SM70-NEXT: mov.b64 %rd6, %rd1; +; SM70-NEXT: @%p2 bra $L__BB71_1; ; SM70-NEXT: // %bb.2: // %atomicrmw.end ; SM70-NEXT: fence.acq_rel.cta; ; SM70-NEXT: st.param.b64 [func_retval0], %rd1; diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-sm90.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-sm90.ll index bc918023b2658..d4f81c2436318 100644 --- a/llvm/test/CodeGen/NVPTX/atomicrmw-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/atomicrmw-sm90.ll @@ -1983,29 +1983,23 @@ define double @fmax_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %va define double @fminimum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %val) { ; SM90-LABEL: fminimum_acq_rel_double_global_cta( ; SM90: { -; SM90-NEXT: .reg .pred %p<6>; -; SM90-NEXT: .reg .b64 %rd<10>; +; SM90-NEXT: .reg .pred %p<3>; +; SM90-NEXT: .reg .b64 %rd<7>; ; SM90-EMPTY: ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b64 %rd3, [fminimum_acq_rel_double_global_cta_param_1]; ; SM90-NEXT: ld.param.b64 %rd2, [fminimum_acq_rel_double_global_cta_param_0]; ; SM90-NEXT: fence.release.cta; -; SM90-NEXT: ld.global.b64 %rd9, [%rd2]; -; SM90-NEXT: setp.eq.b64 %p3, %rd3, -9223372036854775808; +; SM90-NEXT: ld.global.b64 %rd6, [%rd2]; ; SM90-NEXT: $L__BB70_1: // %atomicrmw.start ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 -; SM90-NEXT: setp.nan.f64 %p1, %rd9, %rd3; -; SM90-NEXT: min.f64 %rd4, %rd9, %rd3; +; SM90-NEXT: setp.nan.f64 %p1, %rd6, %rd3; +; SM90-NEXT: min.f64 %rd4, %rd6, %rd3; ; SM90-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p1; -; SM90-NEXT: setp.eq.b64 %p2, %rd9, -9223372036854775808; -; SM90-NEXT: selp.f64 %rd6, %rd9, %rd5, %p2; -; SM90-NEXT: selp.f64 %rd7, %rd3, %rd6, %p3; -; SM90-NEXT: setp.eq.f64 %p4, %rd5, 0d0000000000000000; -; SM90-NEXT: selp.f64 %rd8, %rd7, %rd5, %p4; -; SM90-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd9, %rd8; -; SM90-NEXT: setp.ne.b64 %p5, %rd1, %rd9; -; SM90-NEXT: mov.b64 %rd9, %rd1; -; SM90-NEXT: @%p5 bra $L__BB70_1; +; SM90-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd6, %rd5; +; SM90-NEXT: setp.ne.b64 %p2, %rd1, %rd6; +; SM90-NEXT: mov.b64 %rd6, %rd1; +; SM90-NEXT: @%p2 bra $L__BB70_1; ; SM90-NEXT: // %bb.2: // %atomicrmw.end ; SM90-NEXT: fence.acquire.cta; ; SM90-NEXT: st.param.b64 [func_retval0], %rd1; @@ -2017,29 +2011,23 @@ define double @fminimum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double define double @fmaximum_acq_rel_double_global_cta(ptr addrspace(1) %addr, double %val) { ; SM90-LABEL: fmaximum_acq_rel_double_global_cta( ; SM90: { -; SM90-NEXT: .reg .pred %p<6>; -; SM90-NEXT: .reg .b64 %rd<10>; +; SM90-NEXT: .reg .pred %p<3>; +; SM90-NEXT: .reg .b64 %rd<7>; ; SM90-EMPTY: ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b64 %rd3, [fmaximum_acq_rel_double_global_cta_param_1]; ; SM90-NEXT: ld.param.b64 %rd2, [fmaximum_acq_rel_double_global_cta_param_0]; ; SM90-NEXT: fence.release.cta; -; SM90-NEXT: ld.global.b64 %rd9, [%rd2]; -; SM90-NEXT: setp.eq.b64 %p3, %rd3, 0; +; SM90-NEXT: ld.global.b64 %rd6, [%rd2]; ; SM90-NEXT: $L__BB71_1: // %atomicrmw.start ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 -; SM90-NEXT: setp.nan.f64 %p1, %rd9, %rd3; -; SM90-NEXT: max.f64 %rd4, %rd9, %rd3; +; SM90-NEXT: setp.nan.f64 %p1, %rd6, %rd3; +; SM90-NEXT: max.f64 %rd4, %rd6, %rd3; ; SM90-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p1; -; SM90-NEXT: setp.eq.b64 %p2, %rd9, 0; -; SM90-NEXT: selp.f64 %rd6, %rd9, %rd5, %p2; -; SM90-NEXT: selp.f64 %rd7, %rd3, %rd6, %p3; -; SM90-NEXT: setp.eq.f64 %p4, %rd5, 0d0000000000000000; -; SM90-NEXT: selp.f64 %rd8, %rd7, %rd5, %p4; -; SM90-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd9, %rd8; -; SM90-NEXT: setp.ne.b64 %p5, %rd1, %rd9; -; SM90-NEXT: mov.b64 %rd9, %rd1; -; SM90-NEXT: @%p5 bra $L__BB71_1; +; SM90-NEXT: atom.relaxed.cta.global.cas.b64 %rd1, [%rd2], %rd6, %rd5; +; SM90-NEXT: setp.ne.b64 %p2, %rd1, %rd6; +; SM90-NEXT: mov.b64 %rd6, %rd1; +; SM90-NEXT: @%p2 bra $L__BB71_1; ; SM90-NEXT: // %bb.2: // %atomicrmw.end ; SM90-NEXT: fence.acquire.cta; ; SM90-NEXT: st.param.b64 [func_retval0], %rd1; diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index 1ed296269c521..de7d55dc7f75e 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -681,8 +681,8 @@ define half @minimum_half(half %a, half %b) { define float @minimum_float(float %a, float %b) { ; CHECK-NOF16-LABEL: minimum_float( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_param_0]; @@ -690,13 +690,7 @@ define float @minimum_float(float %a, float %b) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float( @@ -727,19 +721,15 @@ define float @minimum_float(float %a, float %b) { define float @minimum_imm1(float %a) { ; CHECK-NOF16-LABEL: minimum_imm1( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<4>; -; CHECK-NOF16-NEXT: .reg .b32 %r<6>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm1_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; -; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; -; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_imm1( @@ -768,19 +758,15 @@ define float @minimum_imm1(float %a) { define float @minimum_imm2(float %a) { ; CHECK-NOF16-LABEL: minimum_imm2( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<4>; -; CHECK-NOF16-NEXT: .reg .b32 %r<6>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm2_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; -; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; -; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_imm2( @@ -809,8 +795,8 @@ define float @minimum_imm2(float %a) { define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-LABEL: minimum_float_ftz( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0]; @@ -818,13 +804,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.ftz.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float_ftz( @@ -855,8 +835,8 @@ define float @minimum_float_ftz(float %a, float %b) #1 { define double @minimum_double(double %a, double %b) { ; CHECK-LABEL: minimum_double( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<5>; -; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [minimum_double_param_0]; @@ -864,13 +844,7 @@ define double @minimum_double(double %a, double %b) { ; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; ; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; -; CHECK-NEXT: setp.eq.b64 %p2, %rd1, -9223372036854775808; -; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; -; CHECK-NEXT: setp.eq.b64 %p3, %rd2, -9223372036854775808; -; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; -; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; -; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %x = call double @llvm.minimum.f64(double %a, double %b) ret double %x @@ -1212,17 +1186,15 @@ define half @maximum_half(half %a, half %b) { define float @maximum_imm1(float %a) { ; CHECK-NOF16-LABEL: maximum_imm1( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<3>; -; CHECK-NOF16-NEXT: .reg .b32 %r<5>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm1_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm1( @@ -1251,17 +1223,15 @@ define float @maximum_imm1(float %a) { define float @maximum_imm2(float %a) { ; CHECK-NOF16-LABEL: maximum_imm2( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<3>; -; CHECK-NOF16-NEXT: .reg .b32 %r<5>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm2_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm2( @@ -1290,8 +1260,8 @@ define float @maximum_imm2(float %a) { define float @maximum_float(float %a, float %b) { ; CHECK-NOF16-LABEL: maximum_float( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_param_0]; @@ -1299,13 +1269,7 @@ define float @maximum_float(float %a, float %b) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: max.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float( @@ -1336,8 +1300,8 @@ define float @maximum_float(float %a, float %b) { define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-LABEL: maximum_float_ftz( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<2>; +; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0]; @@ -1345,13 +1309,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: max.ftz.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float_ftz( @@ -1382,8 +1340,8 @@ define float @maximum_float_ftz(float %a, float %b) #1 { define double @maximum_double(double %a, double %b) { ; CHECK-LABEL: maximum_double( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<5>; -; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [maximum_double_param_0]; @@ -1391,13 +1349,7 @@ define double @maximum_double(double %a, double %b) { ; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; ; CHECK-NEXT: max.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; -; CHECK-NEXT: setp.eq.b64 %p2, %rd1, 0; -; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; -; CHECK-NEXT: setp.eq.b64 %p3, %rd2, 0; -; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; -; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; -; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %x = call double @llvm.maximum.f64(double %a, double %b) ret double %x