diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ada25acc3636a..022c63037bb63 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -517,6 +517,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: case VPInstruction::AnyOf: + case VPInstruction::Not: return true; default: return false; @@ -569,7 +570,8 @@ Value *VPInstruction::generate(VPTransformState &State) { switch (getOpcode()) { case VPInstruction::Not: { - Value *A = State.get(getOperand(0)); + bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); + Value *A = State.get(getOperand(0), OnlyFirstLaneUsed); return Builder.CreateNot(A, Name); } case Instruction::ExtractElement: { @@ -1120,6 +1122,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case Instruction::Select: case Instruction::Or: case Instruction::Freeze: + case VPInstruction::Not: // TODO: Cover additional opcodes. return vputils::onlyFirstLaneUsed(this); case VPInstruction::ActiveLaneMask: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 77b92394a34a7..a2c6a21796e8f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -721,9 +721,9 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP14]], ptr [[NEXT_GEP]], i32 8, [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) -; PRED-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; PRED-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 -; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PRED-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true +; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: @@ -1380,9 +1380,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED: [[PRED_STORE_CONTINUE27]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) -; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP85:%.*]] = xor i1 [[TMP84]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0 ; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 6ff6bb48abeb9..bf72fea73d40b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -143,10 +143,10 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( zeroinitializer, ptr [[TMP34]], i32 8, [[TMP23]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) -; CHECK-NEXT: [[TMP36:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-NEXT: [[TMP35:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP37:%.*]] = extractelement [[TMP36]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -262,10 +262,10 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP23]], ptr [[TMP38]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) -; CHECK-NEXT: [[TMP47:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-NEXT: [[TMP39:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP40:%.*]] = xor i1 [[TMP39]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: [[TMP48:%.*]] = extractelement [[TMP47]], i32 0 -; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll index 68467662111c3..30109973b91aa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll @@ -64,10 +64,10 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX_NEXT]], i64 1002) -; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP29]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0 -; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 5de9d0e3fc93f..3475b951e54f5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -75,8 +75,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP6]], true ; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index ea44fc35e1484..ab4ec848ad4c5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -75,8 +75,7 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP6]], true ; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 3e417a0f8de2f..3cef1f6e03ff9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -126,9 +126,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP23]], ptr [[TMP26]], i32 1, [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP28:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; PRED-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 -; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED-NEXT: [[TMP25:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP27:%.*]] = xor i1 [[TMP25]], true +; PRED-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: @@ -293,9 +293,9 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: [[PRED_STORE_CONTINUE2]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP25:%.*]] = xor i1 [[TMP24]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; PRED-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0 ; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -482,9 +482,9 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: [[PRED_STORE_CONTINUE7]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]]) -; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; PRED-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0 ; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -673,9 +673,9 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: [[PRED_STORE_CONTINUE6]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP35:%.*]] = xor i1 [[TMP34]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0 ; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index a54a404ebbda6..bbc2e324941a1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -422,9 +422,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP15]], ptr [[TMP13]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP1]]) -; CHECK-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP31:%.*]] = xor i1 [[TMP30]], true +; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END_LOOPEXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 078f98f54525b..aed1c3d9fcc4c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -87,8 +87,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON: pred.store.continue2: ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]]) -; TFCOMMON-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP18]], i32 0 +; TFCOMMON-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFCOMMON-NEXT: [[TMP17:%.*]] = xor i1 [[TMP15]], true ; TFCOMMON-NEXT: br i1 [[TMP17]], label [[END:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; TFCOMMON: end: ; TFCOMMON-NEXT: ret void @@ -153,9 +153,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]]) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT10]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[TMP27]], i64 [[TMP3]]) -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP28]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP30]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor i1 [[TMP26]], true +; TFA_INTERLEAVE-NEXT: br i1 [[TMP28]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFA_INTERLEAVE: end: ; TFA_INTERLEAVE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 32a69f1af3c97..7028678b338f0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -64,8 +64,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 +; TFCOMMON-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFCOMMON-NEXT: [[TMP9:%.*]] = xor i1 [[TMP8]], true ; TFCOMMON-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void @@ -104,8 +104,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true ; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void @@ -196,8 +196,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 +; TFCOMMON-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFCOMMON-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true ; TFCOMMON-NEXT: br i1 [[TMP11]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void @@ -242,8 +242,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = xor i1 [[TMP24]], true ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void @@ -353,8 +353,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 +; TFCOMMON-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFCOMMON-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true ; TFCOMMON-NEXT: br i1 [[TMP14]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void @@ -405,8 +405,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = add i64 [[INDEX_NEXT]], [[TMP28]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP29]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement [[TMP30]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = xor i1 [[TMP30]], true ; TFA_INTERLEAVE-NEXT: br i1 [[TMP31]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void @@ -625,8 +625,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFALWAYS-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 +; TFALWAYS-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFALWAYS-NEXT: [[TMP9:%.*]] = xor i1 [[TMP8]], true ; TFALWAYS-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFALWAYS: [[FOR_COND_CLEANUP]]: ; TFALWAYS-NEXT: ret void @@ -648,8 +648,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFFALLBACK-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 +; TFFALLBACK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFFALLBACK-NEXT: [[TMP9:%.*]] = xor i1 [[TMP8]], true ; TFFALLBACK-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFFALLBACK: [[FOR_COND_CLEANUP]]: ; TFFALLBACK-NEXT: ret void @@ -688,8 +688,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true ; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void @@ -789,8 +789,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; TFALWAYS-NEXT: [[TMP12:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFALWAYS-NEXT: [[TMP13:%.*]] = xor i1 [[TMP12]], true ; TFALWAYS-NEXT: br i1 [[TMP13]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFALWAYS: [[FOR_COND_CLEANUP]]: ; TFALWAYS-NEXT: ret double [[TMP11]] @@ -819,8 +819,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; TFFALLBACK-NEXT: [[TMP12:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFFALLBACK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP12]], true ; TFFALLBACK-NEXT: br i1 [[TMP13]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFFALLBACK: [[FOR_COND_CLEANUP]]: ; TFFALLBACK-NEXT: ret double [[TMP11]] @@ -870,8 +870,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret double [[TMP24]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 4b3dd6f4e5a03..29c5119f0b98d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -529,10 +529,10 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr [[TMP22]], i32 1, [[ACTIVE_LANE_MASK]]) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; DEFAULT-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: @@ -597,10 +597,10 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr [[TMP22]], i32 1, [[ACTIVE_LANE_MASK]]) ; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; OPTSIZE-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; OPTSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; OPTSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; OPTSIZE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 -; OPTSIZE-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: @@ -665,10 +665,10 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr [[TMP22]], i32 1, [[ACTIVE_LANE_MASK]]) ; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; MINSIZE-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; MINSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; MINSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; MINSIZE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 -; MINSIZE-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 0f0713de5b289..c1725cf498b74 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -1433,8 +1433,8 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP19]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 +; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) @@ -1470,8 +1470,8 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP19]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) @@ -1507,8 +1507,8 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP18]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 +; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = xor i1 [[TMP19]], true ; CHECK-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 025a826c15c81..9133798b270fd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -501,8 +501,8 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP10]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = extractelement [[TMP11]], i32 0 +; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = xor i1 [[TMP11]], true ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) @@ -529,8 +529,8 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP10]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = extractelement [[TMP11]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = xor i1 [[TMP11]], true ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) @@ -557,8 +557,8 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 +; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true ; CHECK-MAXBW-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 32a968ac43d86..29693785964d2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -235,9 +235,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP41]] = select [[ACTIVE_LANE_MASK]], [[TMP40]], [[VEC_PHI]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[IV]], i64 [[TMP10]]) -; PRED-NEXT: [[TMP42:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; PRED-NEXT: [[TMP43:%.*]] = extractelement [[TMP42]], i32 0 -; PRED-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED-NEXT: [[TMP43:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP42:%.*]] = xor i1 [[TMP43]], true +; PRED-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP41]]) ; PRED-NEXT: br label %[[EXIT:.*]] @@ -458,9 +458,9 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP16]] = select [[ACTIVE_LANE_MASK]], [[TMP21]], [[VEC_PHI]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; PRED-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; PRED-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 -; PRED-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PRED-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP17:%.*]] = xor i1 [[TMP15]], true +; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP19:%.*]] = call i16 @llvm.vector.reduce.or.nxv8i16( [[TMP16]]) ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 17905e94d75cd..d906918a1a741 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -139,8 +139,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP8]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -388,8 +388,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP33]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP36]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP39]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = extractelement [[TMP40]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = xor i1 [[TMP40]], true ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -628,8 +628,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[TMP16]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP15]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -861,8 +861,8 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP7]]) -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] @@ -1079,8 +1079,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -1540,8 +1540,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP47]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = extractelement [[TMP54]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -1850,8 +1850,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP47]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = extractelement [[TMP54]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 4e89d94d01147..c6e10c50c6f44 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -95,9 +95,9 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( zeroinitializer, ptr [[TMP13]], i32 1, [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) -; PRED-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; PRED-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 -; PRED-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP12:%.*]] = xor i1 [[TMP14]], true +; PRED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: @@ -233,8 +233,8 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-NEXT: call void @llvm.masked.store.nxv2i8.p0( [[TMP9]], ptr [[TMP5]], i32 1, [[ACTIVE_LANE_MASK]]), !alias.scope [[META7:![0-9]+]], !noalias [[META4]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1000) -; PRED-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; PRED-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; PRED-NEXT: [[TMP12:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP13:%.*]] = xor i1 [[TMP12]], true ; PRED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 40ad5bb3c9052..32235860dd9e2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -111,8 +111,8 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP17]], i32 1, [[INTERLEAVED_MASK3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP19]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] @@ -241,8 +241,8 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP15]], i32 1, [[TMP12]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] @@ -375,8 +375,8 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP17]], i32 1, [[TMP14]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] @@ -535,8 +535,8 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP22]], i32 1, [[INTERLEAVED_MASK3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll index 49f98704c721f..e9643c974eb8f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll @@ -25,9 +25,9 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 7) -; CHECK-NEXT: [[TMP9:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[TMP9]], i32 0 -; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[EXTRACT_FIRST_LANE_MASK:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[COND:%.*]] = xor i1 [[EXTRACT_FIRST_LANE_MASK]], true +; CHECK-NEXT: br i1 [[COND]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 10fe67dd52137..9636d0aaa43e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -64,9 +64,9 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP15]], true +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index ed2c5cd56d380..9b725c3304549 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -24,9 +24,9 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP13]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; CHECK-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP9]], true +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index 8ba5c879627d8..e17f69d4c3d22 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -31,8 +31,8 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP14]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) @@ -77,8 +77,8 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP15]] = add i32 [[VEC_PHI]], [[TMP14]] ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 +; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = xor i1 [[TMP18]], true ; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -139,8 +139,8 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -183,8 +183,8 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true ; CHECK-IN-LOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -249,9 +249,9 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP20]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP23:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP16]], true +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP20]]) ; CHECK-NEXT: br label [[FOR_END:%.*]] @@ -306,8 +306,8 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP19]] = xor i32 [[VEC_PHI]], [[TMP18]] ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = extractelement [[TMP22]], i32 0 +; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = xor i1 [[TMP22]], true ; CHECK-IN-LOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 01b864ba2c7a5..cf8cad2298c24 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -67,9 +67,9 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT11]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP65]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP68]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP71]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP72:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP76:%.*]] = extractelement [[TMP72]], i32 0 -; CHECK-NEXT: br i1 [[TMP76]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true +; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: @@ -174,9 +174,9 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP87]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP90]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP94:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP98:%.*]] = extractelement [[TMP94]], i32 0 -; CHECK-NEXT: br i1 [[TMP98]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP66:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP67:%.*]] = xor i1 [[TMP66]], true +; CHECK-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index e996535b75f60..d4ffb0310fa2e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -28,9 +28,9 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: @@ -71,9 +71,9 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX1]], i64 [[TMP2]]) -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = xor i1 [[TMP6]], true +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: @@ -118,9 +118,9 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP12]], true +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: @@ -177,9 +177,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP20]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) -; CHECK-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -229,8 +229,8 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -282,9 +282,9 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP14]], true +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: @@ -340,9 +340,9 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: @@ -401,9 +401,9 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: @@ -452,9 +452,9 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP15]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: @@ -507,9 +507,9 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP16]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = xor i1 [[TMP14]], true +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll index e55e3222bc5b0..87a6ce8461b94 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -25,8 +25,8 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 4 ; CHECK-NEXT: [[NEXT_ACTIVE_LANE_MASK]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[IDX]], i64 [[N2]]) -; CHECK-NEXT: [[NOT_ACTIVE_LANE_MASK:%.*]] = xor <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], splat (i1 true) -; CHECK-NEXT: [[FIRST_LANE_SET:%.*]] = extractelement <4 x i1> [[NOT_ACTIVE_LANE_MASK]], i32 0 +; CHECK-NEXT: [[EXTRACT_FIRST_LANE_MASK:%.*]] = extractelement <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], i32 0 +; CHECK-NEXT: [[FIRST_LANE_SET:%.*]] = xor i1 [[EXTRACT_FIRST_LANE_MASK]], true ; CHECK-NEXT: br i1 [[FIRST_LANE_SET]], label %middle.block, label %vector.body entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 9736dde57385e..6afeb4836ffda 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -146,9 +146,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP10]], i32 4, [[ACTIVE_LANE_MASK]]) ; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]] ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]]) -; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; DATA_AND_CONTROL-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 -; DATA_AND_CONTROL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DATA_AND_CONTROL-NEXT: [[TMP6:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; DATA_AND_CONTROL-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true +; DATA_AND_CONTROL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL: middle.block: ; DATA_AND_CONTROL-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; DATA_AND_CONTROL: scalar.ph: @@ -186,9 +186,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP15]], true +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; DATA_AND_CONTROL_NO_RT_CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 720ea1f79c36d..3802845e82c82 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -75,8 +75,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP6]], true ; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll index 3ef37bc34bb1b..f2d556e2759b7 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll @@ -217,8 +217,7 @@ define float @fmaxnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = xor i1 [[TMP3]], true ; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -284,8 +283,7 @@ define float @fmaxnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = xor i1 [[TMP3]], true ; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -354,8 +352,7 @@ define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 10, [[TMP9]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP6]], true ; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[CMP_N]], [[TMP13]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -424,8 +421,7 @@ define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[START]], [[TMP9]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP6]], true ; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[CMP_N]], [[TMP13]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll index 0f688ab0dfb0b..95c6f2848037e 100644 --- a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll @@ -217,8 +217,7 @@ define float @fminnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = xor i1 [[TMP3]], true ; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -284,8 +283,7 @@ define float @fminnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = xor i1 [[TMP3]], true ; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: