diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 70b174509fc47..41ee4aede3122 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -290,8 +290,11 @@ struct Recipe_match { if ((!matchRecipeAndOpcode(R) && ...)) return false; - if (R->getNumOperands() != std::tuple_size::value) { - assert(Opcode == Instruction::PHI && + if (R->getNumOperands() != std::tuple_size_v) { + auto *RepR = dyn_cast(R); + assert((Opcode == Instruction::PHI || + (RepR && std::tuple_size_v == + RepR->getNumOperands() - RepR->isPredicated())) && "non-variadic recipe with matched opcode does not have the " "expected number of operands"); return false; @@ -564,6 +567,12 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) { return m_c_Binary(Op0, Op1); } +template +inline AllRecipe_match +m_UDiv(const Op0_t &Op0, const Op1_t &Op1) { + return m_Binary(Op0, Op1); +} + /// Match a binary AND operation. template inline AllRecipe_commutative_match diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b1880517a4199..d26b56d21ef03 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1342,6 +1342,19 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith( Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0)); + const APInt *APC; + if (match(Def, m_c_Mul(m_VPValue(A), m_APInt(APC))) && APC->isPowerOf2()) + return Def->replaceAllUsesWith(Builder.createNaryOp( + Instruction::Shl, + {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, + *cast(Def), Def->getDebugLoc())); + + if (match(Def, m_UDiv(m_VPValue(A), m_APInt(APC))) && APC->isPowerOf2()) + return Def->replaceAllUsesWith(Builder.createNaryOp( + Instruction::LShr, + {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, {}, + Def->getDebugLoc())); + if (match(Def, m_Not(m_VPValue(A)))) { if (match(A, m_Not(m_VPValue(A)))) return Def->replaceAllUsesWith(A); diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index af9262f3ca4d1..32849cfbbd636 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -188,6 +188,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { case Instruction::Trunc: case Instruction::ZExt: case Instruction::Mul: + case Instruction::Shl: case Instruction::FMul: case VPInstruction::Broadcast: // Opcodes above can only use EVL after wide inductions have been diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index ac8095ae5c3e7..077bde70a537b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -9,7 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -71,7 +71,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 4f2933ad2f85c..d7d77cb4325d4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -528,8 +528,8 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], 4 -; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4 +; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP4]], 2 +; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP11]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]] ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[N_VEC]], 8 @@ -545,7 +545,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[TMP8]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp [[BROADCAST_SPLAT]] to -; DEFAULT-NEXT: [[TMP14:%.*]] = mul nuw nsw i64 [[TMP11]], 2 +; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP11]], 1 ; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3 ; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]] ; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]] @@ -568,7 +568,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: br label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] @@ -1219,7 +1219,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; DEFAULT-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP9]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[Y]], i64 0 @@ -1273,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; PRED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 2b294696ebe89..99bbcad95b6de 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -21,8 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP11]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] @@ -106,7 +106,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]] @@ -220,7 +220,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 14c53cd89c922..74598e2063e48 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -12,7 +12,7 @@ define void @f1(ptr %A) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 26a9545764091..d18283f831799 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -77,7 +77,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SVE: vector.ph: ; SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; SVE-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4cacc30a714b6..aa6e4df26d71b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -30,8 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8 -; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP13]], 2 +; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP9]], 3 +; DEFAULT-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP13]], 1 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 @@ -70,7 +70,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT: [[VEC_EPILOG_PH]]: ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; DEFAULT-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 4 +; DEFAULT-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 2 ; DEFAULT-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], [[TMP34]] ; DEFAULT-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i32 [[X]], i64 0 @@ -130,7 +130,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 +; PRED-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll index c2224858049b7..bdc36ee44559e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll @@ -227,8 +227,8 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE-4-VLA: vector.ph: ; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP6]], 16 -; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP10]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP6]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP10]], 2 ; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[B:%.*]], i64 0 @@ -239,7 +239,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-VLA: vector.body: ; INTERLEAVE-4-VLA-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = mul nuw nsw i64 [[TMP10]], 2 +; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP10]], 1 ; INTERLEAVE-4-VLA-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP10]], 3 ; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP10]] ; INTERLEAVE-4-VLA-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP13]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index 040acb494a42e..fff9365baccb4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -144,8 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE-4-VLA: vector.ph: ; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 -; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2 +; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2 ; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]] @@ -156,7 +156,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP5]], 2 +; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 1 ; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw nsw i64 [[TMP5]], 3 ; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]] ; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll index cc3b1c9c9db8a..de5a24666626c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll @@ -16,7 +16,7 @@ define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noal ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll b/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll index 116c017a4bd73..2b0684f3cda01 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll @@ -73,8 +73,8 @@ define i32 @sext_of_non_memory_op(ptr %src, i32 %offset, i64 %n) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16 -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP12]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -88,7 +88,7 @@ define i32 @sext_of_non_memory_op(ptr %src, i32 %offset, i64 %n) #0 { ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET]], [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP11]], 1 ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw nsw i64 [[TMP11]], 3 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP17]] @@ -122,7 +122,7 @@ define i32 @sext_of_non_memory_op(ptr %src, i32 %offset, i64 %n) #0 { ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP31]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], 8 +; CHECK-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 3 ; CHECK-NEXT: [[N_MOD_VF10:%.*]] = urem i64 [[TMP0]], [[TMP33]] ; CHECK-NEXT: [[N_VEC11:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF10]] ; CHECK-NEXT: [[TMP34:%.*]] = insertelement zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index cf45f3a88f37e..c340cfc9ad6cc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -67,7 +67,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS1: [[VECTOR_PH]]: ; CHECK-VS1-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 16 +; CHECK-VS1-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4 ; CHECK-VS1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 @@ -160,7 +160,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS2: [[VECTOR_PH]]: ; CHECK-VS2-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +; CHECK-VS2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3 ; CHECK-VS2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 @@ -393,7 +393,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: br i1 [[TMP28]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 0a11e8e4390cb..00c7e6eecfb2c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -15,7 +15,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -50,7 +50,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TFCOMMON-NEXT: [[ENTRY:.*]]: ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFCOMMON-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -72,8 +72,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -130,7 +130,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -173,7 +173,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFCOMMON-NEXT: [[ENTRY:.*]]: ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFCOMMON-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -198,8 +198,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -273,7 +273,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -321,7 +321,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFCOMMON-NEXT: [[ENTRY:.*]]: ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFCOMMON-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -349,8 +349,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -433,7 +433,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -485,7 +485,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFFALLBACK-NEXT: [[ENTRY:.*]]: ; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; TFFALLBACK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -557,7 +557,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -592,7 +592,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFALWAYS-NEXT: [[ENTRY:.*]]: ; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFALWAYS-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]] ; TFALWAYS: [[VECTOR_BODY]]: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -614,7 +614,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFFALLBACK-NEXT: [[ENTRY:.*]]: ; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFFALLBACK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -636,8 +636,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -696,7 +696,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 @@ -741,7 +741,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { ; TFALWAYS-NEXT: [[ENTRY:.*]]: ; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFALWAYS-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]] @@ -770,7 +770,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { ; TFFALLBACK-NEXT: [[ENTRY:.*]]: ; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFFALLBACK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -799,8 +799,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll index fe7f43f7f4b02..3887322cb95b1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll @@ -49,7 +49,7 @@ define void @wombat(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %a ; CHECK-NEXT: br i1 [[CONFLICT_RDX27]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[ARG6]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index bdbf08aecf6b3..b3d49f3b8b168 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -440,7 +440,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] @@ -490,7 +490,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: br label %[[VECTOR_PH:.*]] ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; OPTSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; OPTSIZE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; OPTSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; OPTSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; OPTSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] @@ -540,7 +540,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: br label %[[VECTOR_PH:.*]] ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; MINSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; MINSIZE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; MINSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; MINSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; MINSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 5f8d7e7d24cc4..33f8794d38d61 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -15,7 +15,7 @@ define void @foo() { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll index dd0107b8c4bff..f90b26013fcbc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll @@ -58,7 +58,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -100,7 +100,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -211,7 +211,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -252,7 +252,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -363,7 +363,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -405,7 +405,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -520,7 +520,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -563,7 +563,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -680,7 +680,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -723,7 +723,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -844,7 +844,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -889,7 +889,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1008,7 +1008,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1048,7 +1048,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1152,7 +1152,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1188,7 +1188,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1289,7 +1289,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE: vector.ph: ; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1329,7 +1329,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-SVE-MAXBW: vector.ph: ; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll index b033f6051f812..ba00cb0c543fd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll @@ -156,7 +156,7 @@ define i32 @red_zext_mul_by_256(ptr %start, ptr %end) { ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 256) +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> [[TMP3]], splat (i32 8) ; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -290,7 +290,7 @@ define i32 @red_sext_mul_by_128(ptr %start, ptr %end) { ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 128) +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> [[TMP3]], splat (i32 7) ; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -423,7 +423,7 @@ define i32 @red_sext_mul_by_256(ptr %start, ptr %end) { ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 256) +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> [[TMP3]], splat (i32 8) ; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll index fed979e34e5d5..5c2c67337625a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll @@ -11,7 +11,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -85,7 +85,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]] @@ -93,7 +93,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IDX_NEG]], [[IV_NEXT]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ] @@ -116,7 +116,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[TMP13]] = add <4 x i32> [[TMP14]], [[VEC_PHI9]] ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] @@ -140,7 +140,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[CMP_IV_NEG:%.*]] = icmp ugt i64 [[IV_NEG]], 0 ; CHECK-NEXT: [[CMP_IV:%.*]] = icmp ne i64 [[ACCUM1]], -1 ; CHECK-NEXT: [[EXITCOND:%.*]] = and i1 [[CMP_IV_NEG]], [[CMP_IV]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY1]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret void @@ -419,7 +419,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-NEXT: br label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 916bda1e245f7..dc7ed20ac0927 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -12,8 +12,8 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -112,8 +112,8 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 5be1fac8c6287..7db519ffceb98 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -13,7 +13,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -43,8 +43,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -84,7 +84,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -878,7 +878,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -910,8 +910,8 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -945,7 +945,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1003,7 +1003,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP14]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1069,8 +1069,8 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP13]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP16]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP13]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP16]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP14]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1176,7 +1176,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1295,7 +1295,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -1333,7 +1333,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -1371,7 +1371,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4 ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -1432,7 +1432,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1466,8 +1466,8 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1511,7 +1511,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1572,7 +1572,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1606,8 +1606,8 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP6]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP6]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1651,7 +1651,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2145,7 +2145,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 @@ -2186,8 +2186,8 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP3]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP10]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP3]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 @@ -2238,7 +2238,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll index 2060168c531fb..e37f5229f0a16 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll @@ -175,7 +175,7 @@ define void @chained_sext_adds(ptr noalias %src, ptr noalias %dst) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 39d6cac453012..02ba48fb0371c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -13,7 +13,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -44,8 +44,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -87,7 +87,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br label [[ENTRY:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 04a2b65a9dcea..553d942d40d26 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -15,7 +15,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -41,8 +41,8 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -74,7 +74,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -208,7 +208,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -234,8 +234,8 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -267,7 +267,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -313,7 +313,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -339,8 +339,8 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -372,7 +372,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -519,7 +519,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1025) ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: @@ -548,7 +548,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1025) ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: @@ -577,7 +577,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1025) ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: @@ -708,7 +708,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -734,8 +734,8 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -746,7 +746,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw nsw i64 [[TMP4]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP7]] @@ -781,7 +781,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -827,7 +827,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -853,8 +853,8 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -886,7 +886,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -937,7 +937,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP7]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP7]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]] @@ -972,8 +972,8 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP13]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP16]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = shl nuw i32 [[TMP13]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP16]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]] @@ -1014,7 +1014,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 16 +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i32 [[D]], [[N_VEC]] @@ -1068,7 +1068,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP3]], 16 +; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = shl nuw i32 [[TMP3]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP9]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]] @@ -1103,8 +1103,8 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP3]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP8]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = shl nuw i32 [[TMP3]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = shl nuw i32 [[TMP8]], 1 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], [[TMP12]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]] @@ -1145,7 +1145,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 16 +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i32 [[D]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index fd560de78a6d0..356bf21487051 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -12,7 +12,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() @@ -56,10 +56,10 @@ define void @test_invar_gep(ptr %dst) #0 { ; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IC2: vector.ph: ; IC2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; IC2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP2]], 4 +; IC2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP2]], 2 ; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; IC2-NEXT: [[TMP21:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IC2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP11]], 2 +; IC2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP11]], 1 ; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; IC2-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; IC2-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() @@ -120,7 +120,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[START]], [[N_VEC]] @@ -169,10 +169,10 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 { ; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IC2: vector.ph: ; IC2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; IC2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; IC2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 ; IC2-NEXT: [[TMP9:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IC2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 +; IC2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 1 ; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP5]] ; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; IC2-NEXT: [[TMP6:%.*]] = add i64 [[START]], [[N_VEC]] @@ -238,7 +238,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 2 @@ -248,7 +248,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = mul nsw [[TMP10]], splat (i64 2) ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw [[DOTSPLAT]], [[TMP18]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i64 2, [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = shl nsw i64 [[TMP6]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -291,15 +291,15 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 { ; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IC2: vector.ph: ; IC2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IC2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; IC2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; IC2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IC2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +; IC2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1 ; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP7]] ; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; IC2-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], 2 ; IC2-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[TMP10]] -; IC2-NEXT: [[TMP13:%.*]] = mul [[BROADCAST_SPLAT1]], splat (i64 2) +; IC2-NEXT: [[TMP13:%.*]] = shl [[BROADCAST_SPLAT1]], splat (i64 1) ; IC2-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv4i64() ; IC2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[START]], i64 0 ; IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 8e8159adf133e..98ed82122dfc1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -56,7 +56,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VSCALEFORTUNING2: [[VECTOR_PH]]: ; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 3 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 @@ -179,7 +179,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: br label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; PRED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() @@ -294,8 +294,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 4 -; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP5]], 2 +; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP3]], 2 +; DEFAULT-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP5]], 1 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 @@ -354,8 +354,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; VSCALEFORTUNING2: [[VECTOR_PH]]: ; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 4 -; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP7]], 2 +; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP3]], 2 +; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP7]], 1 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 @@ -388,7 +388,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP18]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 2 +; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 1 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], [[TMP20]] ; VSCALEFORTUNING2-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] ; VSCALEFORTUNING2-NEXT: [[TMP21:%.*]] = insertelement zeroinitializer, i16 [[BC_MERGE_RDX]], i32 0 @@ -434,7 +434,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: br label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 +; PRED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; PRED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 ; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP0]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index dd6f0fe5f1292..d41a737bf3610 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -21,7 +21,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index 977713d389b4e..46c2051ce916d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -11,7 +11,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -93,7 +93,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index fbac263819e4b..e3188b1c08c38 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -39,7 +39,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -82,7 +82,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -121,7 +121,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] @@ -195,8 +195,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -207,7 +207,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3 ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] ; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] @@ -256,8 +256,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -265,7 +265,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1 ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3 ; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] ; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] @@ -307,8 +307,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] @@ -336,7 +336,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP1]], 2 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP1]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] @@ -440,7 +440,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 @@ -508,7 +508,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 @@ -569,7 +569,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP6]] @@ -679,7 +679,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -736,7 +736,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -789,7 +789,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] @@ -884,7 +884,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -939,7 +939,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -990,7 +990,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] @@ -1084,7 +1084,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1219,8 +1219,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1231,7 +1231,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3 ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] ; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] @@ -1290,8 +1290,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1299,7 +1299,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1 ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3 ; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] ; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] @@ -1355,8 +1355,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] @@ -1384,7 +1384,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP1]], 2 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP1]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] @@ -1488,8 +1488,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1500,7 +1500,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3 ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] ; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] @@ -1559,8 +1559,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1568,7 +1568,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1 ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3 ; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] ; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] @@ -1624,8 +1624,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] @@ -1653,7 +1653,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP1]], 2 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP1]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll index 9a831690d632d..695ea713ed125 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll @@ -12,7 +12,7 @@ define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 1024, [[TMP3]] @@ -71,7 +71,7 @@ define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 1024, [[TMP3]] @@ -148,7 +148,7 @@ define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, pt ; CHECK-NEXT: br i1 [[CONFLICT_RDX6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 1024, [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 42724d693b52d..c2eadc100e822 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -19,7 +19,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = add i64 3, [[N_VEC]] @@ -232,7 +232,7 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[INDEX1:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index cc03ef8af8b00..50ea180cd9798 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -19,8 +19,8 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]] ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]] @@ -29,7 +29,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP27:%.*]] = mul nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP4]], 3 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP27]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index d3edad4010bb8..2615d0ad28411 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -20,8 +20,8 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: ; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 -; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4 +; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]] ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] @@ -43,7 +43,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT: vec.epilog.ph: ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; DEFAULT-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 +; DEFAULT-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2 ; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP12]] ; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]] ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -76,7 +76,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: br label [[VECTOR_PH:%.*]] ; PRED: vector.ph: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; PRED-NEXT: [[TMP4:%.*]] = sub i64 101, [[TMP3]] @@ -133,8 +133,8 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: ; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP2]], 16 -; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP16]], 2 +; DEFAULT-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP2]], 4 +; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP16]], 1 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 @@ -211,7 +211,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; PRED: vector.ph: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1000) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index 78bd554210c6b..b9bbc272fc63a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -16,8 +16,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index 6d80887cdbecb..347ebeadb40a3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -16,8 +16,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index 6724384bdd227..9d636067c59aa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -16,8 +16,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 5a7caf1713d73..83e4ca74e56fe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -27,8 +27,8 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -59,8 +59,8 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] @@ -117,8 +117,8 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -148,8 +148,8 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] @@ -226,8 +226,8 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -277,8 +277,8 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] @@ -345,8 +345,8 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -395,8 +395,8 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-VF8-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] @@ -465,8 +465,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -524,8 +524,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] @@ -583,8 +583,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -619,8 +619,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll index 29c3244570eca..061f9dc551e22 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll @@ -36,8 +36,8 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -89,8 +89,8 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 { ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.ph: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP4]], 16 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP4]], 4 +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -112,7 +112,7 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 { ; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.ph: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], [[TMP14]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index a766eb599875d..a5f5cbad0d949 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -28,8 +28,8 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP6]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP8]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP6]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP8]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 9b8748b4eb7de..f80693f15a4af 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -18,10 +18,10 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP40:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP40]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP40]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 @@ -84,10 +84,10 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP40:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP40]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP40]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index d92c0e3a9a3f9..ce23149c5cca7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -107,9 +107,9 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw [[TMP2]], splat (i64 1) -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -128,7 +128,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP10]], [[TMP12]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -191,9 +191,9 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw [[TMP2]], splat (i64 1) -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -214,7 +214,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr @CD_i16, [[TMP9]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP14]], align 2 [[TMP15]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -1113,8 +1113,8 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1125,7 +1125,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[P]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT6]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1191,8 +1191,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw [[TMP19]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1206,7 +1206,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], align 4 [[TMP17]], splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], align 4 [[TMP15]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT6]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: @@ -1274,9 +1274,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw [[TMP14]], splat (i64 1) -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP9]], 3 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2 ; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1 @@ -1301,7 +1301,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index 9c3f3f74e4196..e2ac5225fe0fc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -12,7 +12,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -59,7 +59,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 10a099e4f8ec0..a5ac999720e99 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -16,8 +16,8 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP6]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP10]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll index 8c62ffd9c7a98..96fb60b893a66 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll @@ -10,7 +10,7 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 7) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index 809da06a9a4ac..ad85147a03b42 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -28,8 +28,8 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i32 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i32 [[TMP11]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] @@ -96,8 +96,8 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i32 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i32 [[TMP11]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll index b373f0eb390ed..cd78aee4c7491 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll @@ -71,8 +71,8 @@ define void @always_taken(ptr noalias %p0, ptr noalias %p1, i1 %c0, i1 %c1) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP4]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP3]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i1 [[C1]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index da203b9d08a16..9ff193b36c008 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -43,8 +43,8 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP17]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP17]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 8108320fd54ab..1c4b8d61d167e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -47,7 +47,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index 243ea7c3c4c43..b40aea57a3283 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -9,7 +9,7 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index ae7c9d263c179..ba8c1835af45b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -13,7 +13,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -46,7 +46,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-IN-LOOP: vector.ph: ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4 +; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -97,7 +97,7 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -129,7 +129,7 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-IN-LOOP: vector.ph: ; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -178,7 +178,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4 +; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] @@ -215,7 +215,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-IN-LOOP: vector.ph: ; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 +; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 5e94fbe95d4bf..019ea0b3e07f5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -11,8 +11,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 4 -; CHECK-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP61]], 2 +; CHECK-NEXT: [[TMP62:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -41,7 +41,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]] -; CHECK-NEXT: [[TMP56:%.*]] = mul nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP56:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP59:%.*]] = mul nuw nsw i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] @@ -94,8 +94,8 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP3]] @@ -124,7 +124,7 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT15:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[INDEX6]] -; CHECK-NEXT: [[TMP56:%.*]] = mul nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP56:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP59:%.*]] = mul nuw nsw i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 8cc9e431e6214..43a062abba8f2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -11,7 +11,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -103,7 +103,7 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -156,7 +156,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] @@ -166,9 +166,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP14:%.*]] = mul nsw [[TMP13]], splat (i64 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP14]] -; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i64 4, [[TMP4]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -182,7 +182,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -214,7 +214,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -269,7 +269,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] @@ -323,7 +323,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] @@ -390,7 +390,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] @@ -441,7 +441,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -496,7 +496,7 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -549,7 +549,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll index a1b7fed936d69..360e7d6a23c19 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll @@ -10,7 +10,7 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[A]], align 4 @@ -61,8 +61,8 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] @@ -119,7 +119,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -179,8 +179,8 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -246,8 +246,8 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -319,8 +319,8 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP6]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP6]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -390,8 +390,8 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll index 2c7521bddafb6..b7bdd9e64da9a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll @@ -13,7 +13,7 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF1-NEXT: br label [[VECTOR_PH1:%.*]] ; CHECK-UF1: vector.ph: ; CHECK-UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP5]], 16 +; CHECK-UF1-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-UF1-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF1-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 ; CHECK-UF1-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP18]] @@ -42,8 +42,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: br label [[VECTOR_PH1:%.*]] ; CHECK-UF4: vector.ph: ; CHECK-UF4-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 16 -; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-UF4-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP61]], 4 +; CHECK-UF4-NEXT: [[TMP62:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 6 ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]] @@ -62,7 +62,7 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[TMP18]], [[VECTOR_PH1]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[TMP19]], [[VECTOR_PH1]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP32:%.*]] = mul nuw nsw i64 [[TMP1]], 2 +; CHECK-UF4-NEXT: [[TMP32:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-UF4-NEXT: [[TMP29:%.*]] = mul nuw nsw i64 [[TMP1]], 3 ; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP1]] ; CHECK-UF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP32]] @@ -100,8 +100,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-TF: vector.ph: ; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2 +; CHECK-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP0]], 4 +; CHECK-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] @@ -162,7 +162,7 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF1-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-UF1: vector.ph: ; CHECK-UF1-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP12]], 2 +; CHECK-UF1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP12]], 1 ; CHECK-UF1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF1-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-UF1-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[TMP9]] @@ -194,8 +194,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-UF4: vector.ph: ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-UF4-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-UF4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF4-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP4]], 3 ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]] @@ -214,7 +214,7 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[TMP14]], [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP1]], 2 +; CHECK-UF4-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; CHECK-UF4-NEXT: [[TMP24:%.*]] = mul nuw nsw i64 [[TMP1]], 3 ; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP1]] ; CHECK-UF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP21]] @@ -255,8 +255,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-TF: vector.ph: ; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2 +; CHECK-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP0]], 1 +; CHECK-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index eceda0897b174..a381218002fc2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -13,7 +13,7 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]] ; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 456c03824106a..5c57491ef9290 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -23,7 +23,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 @@ -105,7 +105,7 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 3082a49babed4..dca0065eb854c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -240,8 +240,8 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[TMP12]] = add [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: store [[VECTOR_GEP]], ptr [[NEXT_GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP5]], 3 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -311,8 +311,8 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr % ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[VECTOR_GEP]], zeroinitializer ; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0( zeroinitializer, ptr align 2 [[TMP7]], [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index de70da6d2558b..7a3b8ccf3e241 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -19,7 +19,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NONE: vector.ph: ; NONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]] ; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] ; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 @@ -54,7 +54,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: br label [[VECTOR_PH:%.*]] ; DATA: vector.ph: ; DATA-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DATA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; DATA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; DATA-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 ; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] @@ -81,7 +81,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_PH:%.*]] ; DATA_NO_LANEMASK: vector.ph: ; DATA_NO_LANEMASK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; DATA_NO_LANEMASK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 ; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] @@ -116,7 +116,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: br label [[VECTOR_PH:%.*]] ; DATA_AND_CONTROL: vector.ph: ; DATA_AND_CONTROL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; DATA_AND_CONTROL-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -142,7 +142,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; DATA_AND_CONTROL_NO_RT_CHECK: vector.ph: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll index d82dace2c9e04..57d0534872118 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll @@ -13,7 +13,7 @@ define void @load_store_interleave_group(ptr noalias %data) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -62,7 +62,7 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1111, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1111, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -172,7 +172,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst) ; ; CHECK-LABEL: define void @test_masked_interleave_group( ; CHECK-SAME: i32 [[N:%.*]], ptr [[MASK:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ITER_CHECK:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -202,7 +202,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst) ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP21]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP21]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -243,7 +243,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst) ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 8 +; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 3 ; CHECK-NEXT: [[N_MOD_VF10:%.*]] = urem i64 [[TMP1]], [[TMP23]] ; CHECK-NEXT: [[INDEX:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF10]] ; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[INDEX]] to i32 @@ -279,10 +279,10 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst) ; CHECK-NEXT: [[CMP_N24:%.*]] = icmp eq i64 [[TMP1]], [[INDEX]] ; CHECK-NEXT: br i1 [[CMP_N24]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP24]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL25:%.*]] = phi ptr [ [[NEXT_GEP]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[DST]], %[[VECTOR_MEMCHECK]] ], [ [[DST]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi ptr [ [[NEXT_GEP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[SRC]], %[[VECTOR_MEMCHECK]] ], [ [[SRC]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL27:%.*]] = phi ptr [ [[NEXT_GEP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP15]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[MASK]], %[[VECTOR_MEMCHECK]] ], [ [[MASK]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP24]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL25:%.*]] = phi ptr [ [[NEXT_GEP]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[DST]], %[[VECTOR_MEMCHECK]] ], [ [[DST]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi ptr [ [[NEXT_GEP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[SRC]], %[[VECTOR_MEMCHECK]] ], [ [[SRC]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL27:%.*]] = phi ptr [ [[NEXT_GEP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP15]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[MASK]], %[[VECTOR_MEMCHECK]] ], [ [[MASK]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll index 23bc21a49a8b3..be0902180abc8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll @@ -554,7 +554,7 @@ define void @test_2xi64_mul_add_xor_mismatched_opcodes2(ptr noalias %data, ptr n ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) ; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] -; VF2-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], splat (i64 1) ; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] ; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll index 0cfc14a0ae3a8..a44a16455445c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -29,7 +29,7 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -53,8 +53,8 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] @@ -109,7 +109,7 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -133,8 +133,8 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll index 97cc6929e44d5..83186fdc9b38a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll @@ -10,7 +10,7 @@ define i32 @gep_with_all_invariant_operands(ptr %src.0, ptr %src.1, i64 %n, i1 % ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll index 82f272ad853a8..5e9f07e3611e9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll @@ -10,7 +10,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: br label [[VECTOR_PH:%.*]] ; WIDE: vector.ph: ; WIDE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; WIDE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; WIDE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; WIDE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -48,7 +48,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; NARROW-NEXT: br label [[VECTOR_PH:%.*]] ; NARROW: vector.ph: ; NARROW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NARROW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 4 +; NARROW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 2 ; NARROW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] ; NARROW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; NARROW-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index 9f62c7dcda65a..d0dfc8e6fbd2f 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -124,7 +124,7 @@ define void @test_stride2_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 @@ -233,7 +233,7 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> align 4 [[TMP3]], <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) @@ -454,7 +454,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> splat (i32 3), [[TMP3]] -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[X]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[X]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 9daf4236982bd..61916e7f3be1d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -87,7 +87,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 252, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true) -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 4, [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = sext [[VEC_IND]] to @@ -360,9 +360,9 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 2, [[TMP16]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP16]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP10]], 2 @@ -375,7 +375,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP18]], align 4 [[TMP19]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 69d83db49fd18..275386f3737fe 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -62,7 +62,7 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) { ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[B]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 8) +; CHECK-NEXT: [[TMP6:%.*]] = shl [[TMP5]], splat (i64 3) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] @@ -70,8 +70,8 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) { ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP9]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index e35db479dc963..753b75970882d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -11,7 +11,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll index 9f747dd6222a2..bd52d31c2b1c9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll @@ -28,7 +28,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -89,7 +89,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: ; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] @@ -173,7 +173,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -234,7 +234,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: ; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] @@ -318,7 +318,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -379,7 +379,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: ; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 +; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 1 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] @@ -463,7 +463,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -524,7 +524,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: ; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 +; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 1 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 4ccec2ca61778..7a92f0ec9fd44 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -138,7 +138,7 @@ define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 { ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 2, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = or [[VEC_IND2]], [[VEC_IND]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 7e6e45feaa834..c520f667c2eeb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -20,7 +20,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; OUTLOOP: vector.ph: ; OUTLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 +; OUTLOOP-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -70,7 +70,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INLOOP: vector.ph: ; INLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 8 +; INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 3 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -199,7 +199,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; OUTLOOP: vector.ph: ; OUTLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; OUTLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; OUTLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[START:%.*]], i64 0 @@ -245,7 +245,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INLOOP: vector.ph: ; INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -301,7 +301,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] @@ -326,7 +326,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-INLOOP: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 31c8b74194062..7ff7e6deaf503 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -141,7 +141,7 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -197,7 +197,7 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -255,7 +255,7 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -317,7 +317,7 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -381,7 +381,7 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -443,7 +443,7 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -489,7 +489,7 @@ define void @load_store_factor4(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 4 ; CHECK-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.vp.load.nxv8i64.p0(ptr align 8 [[TMP9]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) @@ -509,7 +509,7 @@ define void @load_store_factor4(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -522,7 +522,7 @@ define void @load_store_factor4(ptr %p) { ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 4 +; FIXED-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 2 ; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP1]], align 8 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> @@ -555,7 +555,7 @@ define void @load_store_factor4(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], 4 +; SCALABLE-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2 ; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 4 ; SCALABLE-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.vp.load.nxv8i64.p0(ptr align 8 [[TMP9]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) @@ -575,7 +575,7 @@ define void @load_store_factor4(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] ; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -649,7 +649,7 @@ define void @load_store_factor5(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP25]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -721,7 +721,7 @@ define void @load_store_factor5(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP25]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -803,7 +803,7 @@ define void @load_store_factor6(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP28]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -880,7 +880,7 @@ define void @load_store_factor6(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP28]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] ; SCALABLE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -970,7 +970,7 @@ define void @load_store_factor7(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP31]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP31]] ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -1053,7 +1053,7 @@ define void @load_store_factor7(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP31]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP31]] ; SCALABLE-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -1151,7 +1151,7 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP34]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP34]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -1237,7 +1237,7 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP34]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP34]] ; SCALABLE-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -1327,7 +1327,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -1379,7 +1379,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: @@ -1434,7 +1434,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -1486,7 +1486,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: br label [[LOOP:%.*]] ; SCALABLE: exit: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index f49f9284a1e93..ca44fbba98074 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -54,9 +54,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; RV32-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 -; RV32-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]] +; RV32-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP8]], 4 ; RV32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 -; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; RV32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] ; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i32.nxv2p0( align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; RV32-NEXT: [[TMP14:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], splat (i32 100) @@ -68,7 +68,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3]], !noalias [[META5]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; RV32-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; RV32: middle.block: @@ -123,9 +123,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; RV64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 -; RV64-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]] -; RV64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 -; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; RV64-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP8]], 4 +; RV64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; RV64-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] ; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i32.nxv2p0( align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]] ; RV64-NEXT: [[TMP14:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], splat (i32 100) @@ -137,7 +137,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; RV64-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; RV64-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV64-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index 1ae1ba6795c01..d41f5e4d92e58 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -19,7 +19,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: ; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; V-NEXT: br label [[VECTOR_BODY:%.*]] @@ -52,7 +52,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: ; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] @@ -212,7 +212,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: ; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; V-NEXT: br label [[VECTOR_BODY:%.*]] @@ -245,7 +245,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: ; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] @@ -405,7 +405,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: ; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; V-NEXT: br label [[VECTOR_BODY:%.*]] @@ -438,7 +438,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: ; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] @@ -597,7 +597,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; V: vector.ph: ; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; V-NEXT: br label [[VECTOR_BODY:%.*]] @@ -630,7 +630,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; ZVQDOTQ: vector.ph: ; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll index 850a6cb7ddb0d..6d547cf1397ba 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll @@ -20,20 +20,20 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP7]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector [[BROADCAST_SPLATINSERT8]], poison, zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], [[TMP13]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) @@ -51,8 +51,8 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; CHECK-NEXT: call void @llvm.vp.store.nxv12i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] -; CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT9]] +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND3]], [[BROADCAST_SPLAT8]] ; CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -77,20 +77,20 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; NO-REG-PRESSURE-CHECK: [[VECTOR_BODY]]: ; NO-REG-PRESSURE-CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ] ; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND4:%.*]] = phi [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ] ; NO-REG-PRESSURE-CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] -; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 2 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP7]] -; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 -; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP7]] -; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector [[BROADCAST_SPLATINSERT8]], poison, zeroinitializer ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], [[TMP13]] ; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) @@ -108,8 +108,8 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; NO-REG-PRESSURE-CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) ; NO-REG-PRESSURE-CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] ; NO-REG-PRESSURE-CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] -; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT9]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND3]], [[BROADCAST_SPLAT8]] ; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 84698e998439f..e47f3167e58cc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -105,8 +105,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: ; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; RV64-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 +; RV64-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] @@ -339,8 +339,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: ; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4 -; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2 +; RV64-UF2-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 2 +; RV64-UF2-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]] ; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]] ; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] @@ -590,8 +590,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: ; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4 -; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2 +; RV64-UF2-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 2 +; RV64-UF2-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]] ; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]] ; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] @@ -768,8 +768,8 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: ; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; RV64-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 +; RV64-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index d156d0c501e1f..ad0ac00374aca 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -21,7 +21,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP14]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP15]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) @@ -43,10 +43,10 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UF2: vector.ph: ; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]] @@ -59,8 +59,8 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF2-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-UF2-NEXT: [[TMP9:%.*]] = mul nuw nsw [[VEC_IND]], splat (i64 8) -; CHECK-UF2-NEXT: [[TMP10:%.*]] = mul nuw nsw [[STEP_ADD]], splat (i64 8) +; CHECK-UF2-NEXT: [[TMP9:%.*]] = shl nuw nsw [[VEC_IND]], splat (i64 3) +; CHECK-UF2-NEXT: [[TMP10:%.*]] = shl nuw nsw [[STEP_ADD]], splat (i64 3) ; CHECK-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP9]] ; CHECK-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], [[TMP10]] ; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP11]], splat (i1 true), poison) @@ -123,15 +123,15 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 64, [[TMP11]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP11]], 6 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP13]], align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -147,14 +147,14 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UF2: vector.ph: ; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-UF2-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 64 -; CHECK-UF2-NEXT: [[TMP6:%.*]] = mul [[BROADCAST_SPLAT]], splat (i64 64) +; CHECK-UF2-NEXT: [[TMP6:%.*]] = shl [[BROADCAST_SPLAT]], splat (i64 6) ; CHECK-UF2-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP7]], splat (i64 64) ; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] @@ -226,7 +226,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul [[TMP14]], splat (i64 8) +; CHECK-NEXT: [[TMP16:%.*]] = shl [[TMP14]], splat (i64 3) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP11]]) @@ -234,8 +234,8 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP20]], align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP9]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP9]], 3 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -251,23 +251,23 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UF2: vector.ph: ; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]] ; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP6]] ; CHECK-UF2-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-UF2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP7]] -; CHECK-UF2-NEXT: [[TMP9:%.*]] = mul [[BROADCAST_SPLAT]], splat (i64 8) +; CHECK-UF2-NEXT: [[TMP9:%.*]] = shl [[BROADCAST_SPLAT]], splat (i64 3) ; CHECK-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF2: vector.body: ; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF2-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF2-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-UF2-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i64 8) +; CHECK-UF2-NEXT: [[TMP11:%.*]] = shl [[TMP10]], splat (i64 3) ; CHECK-UF2-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP11]] ; CHECK-UF2-NEXT: [[TMP12:%.*]] = extractelement [[VECTOR_GEP]], i32 0 ; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, [[VECTOR_GEP]], [[TMP9]] @@ -283,7 +283,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], align 4 [[VECTOR_GEP]], splat (i1 true)) ; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP17]], align 4 [[STEP_ADD]], splat (i1 true)) ; CHECK-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-UF2-NEXT: [[TMP18:%.*]] = mul i64 8, [[TMP4]] +; CHECK-UF2-NEXT: [[TMP18:%.*]] = shl i64 [[TMP4]], 3 ; CHECK-UF2-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP18]] ; CHECK-UF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -375,8 +375,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 -; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -497,8 +497,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 -; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -679,8 +679,8 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP6]], 4 -; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP9]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP9]], 1 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP7]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -868,10 +868,10 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-UF2-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED-UF2: vector.ph: ; STRIDED-UF2-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-UF2-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4 +; STRIDED-UF2-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2 ; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP29]], i64 0 ; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; STRIDED-UF2-NEXT: [[TMP30:%.*]] = mul nuw i64 [[TMP29]], 2 +; STRIDED-UF2-NEXT: [[TMP30:%.*]] = shl nuw i64 [[TMP29]], 1 ; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP30]] ; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 @@ -991,8 +991,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 -; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1195,10 +1195,10 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-UF2-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED-UF2: vector.ph: ; STRIDED-UF2-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-UF2-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; STRIDED-UF2-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; STRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 +; STRIDED-UF2-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]] ; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 @@ -1326,10 +1326,10 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() @@ -1408,10 +1408,10 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; STRIDED-UF2: vector.ph: ; STRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; STRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; STRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 +; STRIDED-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 ; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-UF2-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index 0bcd027588f7d..9d78a7e6ff79f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -72,7 +72,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -187,7 +187,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -302,7 +302,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -417,7 +417,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -532,7 +532,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -647,7 +647,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -762,7 +762,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -877,7 +877,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -992,7 +992,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1107,7 +1107,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1222,7 +1222,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1337,7 +1337,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1452,7 +1452,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1571,7 +1571,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1688,7 +1688,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1805,7 +1805,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1922,7 +1922,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -2092,7 +2092,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index f5ff512b94123..f670d2abbcacc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -87,7 +87,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -225,7 +225,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -363,7 +363,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -501,7 +501,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -626,7 +626,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -744,7 +744,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -866,7 +866,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -994,7 +994,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1118,7 +1118,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index 317bcde2f4670..bfacd6bd59f50 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -73,7 +73,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -190,7 +190,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -307,7 +307,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -424,7 +424,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -541,7 +541,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -658,7 +658,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -775,7 +775,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -892,7 +892,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1009,7 +1009,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1126,7 +1126,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1218,7 +1218,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index a44acd3d9e48d..02788541d5d52 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -83,7 +83,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: ; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 @@ -131,7 +131,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: ; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -253,7 +253,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: ; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 @@ -305,7 +305,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: ; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -452,7 +452,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: ; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 @@ -509,7 +509,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: ; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() @@ -655,7 +655,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: ; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 @@ -716,7 +716,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: ; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll index 8cd540c3888db..2e9795609bc00 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll @@ -45,7 +45,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -141,7 +141,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -236,7 +236,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -331,7 +331,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index a0b95db2f3552..108ca55770b06 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -16,7 +16,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; IF-EVL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP8]] to i32 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 @@ -54,7 +54,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() @@ -125,7 +125,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; IF-EVL-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP8]] to i32 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 @@ -169,7 +169,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() @@ -253,7 +253,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; IF-EVL-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP8]] to i32 ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 @@ -304,7 +304,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() @@ -404,7 +404,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; IF-EVL-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 @@ -454,7 +454,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() @@ -530,7 +530,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP18]], 2 +; IF-EVL-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP18]], 1 ; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 ; IF-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) @@ -573,7 +573,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index cc1b2380bc532..77818580ccf50 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -42,7 +42,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -233,7 +233,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -317,7 +317,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -401,7 +401,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -485,7 +485,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -571,7 +571,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -657,7 +657,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -743,7 +743,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -829,7 +829,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1021,7 +1021,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1109,7 +1109,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1415,7 +1415,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1509,7 +1509,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1601,7 +1601,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index b5662b0bd8d3b..98e353707886f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -44,7 +44,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -157,7 +157,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] @@ -281,7 +281,7 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i32 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() @@ -397,7 +397,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] @@ -522,7 +522,7 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i32 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() @@ -639,7 +639,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; NO-VP-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll index 9e89cde3bc24a..b063514370319 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll @@ -129,7 +129,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NO-VP-OUTLOOP: vector.ph: ; NO-VP-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; NO-VP-OUTLOOP-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 @@ -182,7 +182,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NO-VP-INLOOP: vector.ph: ; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll index 1aea6aaff66a3..ec131491c4bde 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll @@ -38,7 +38,7 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i32 [[TMP1]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP11]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll index e9dcf8f0f0395..c7950ee402a9d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll @@ -42,7 +42,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[INC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll index dcb7bf484f4ae..e9f4ffda9b822 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll @@ -41,7 +41,7 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N1]], [[TMP3]] ; NO-VP-NEXT: [[N:%.*]] = sub i64 [[N1]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index d1a2303e35e68..89d04f6423c58 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -42,7 +42,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement zeroinitializer, i32 [[START:%.*]], i32 0 @@ -236,7 +236,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement zeroinitializer, i32 [[START:%.*]], i32 0 @@ -323,7 +323,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement splat (i32 -1), i32 [[START:%.*]], i32 0 @@ -410,7 +410,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement zeroinitializer, i32 [[START:%.*]], i32 0 @@ -499,7 +499,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[START:%.*]], i64 0 @@ -592,7 +592,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[START:%.*]], i64 0 @@ -685,7 +685,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[START:%.*]], i64 0 @@ -778,7 +778,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[START:%.*]], i64 0 @@ -869,7 +869,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement splat (float -0.000000e+00), float [[START:%.*]], i32 0 @@ -1065,7 +1065,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[START:%.*]], i64 0 @@ -1158,7 +1158,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[START:%.*]], i64 0 @@ -1467,7 +1467,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement splat (float -0.000000e+00), float [[START:%.*]], i32 0 @@ -1561,7 +1561,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1653,7 +1653,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 8a96a7f0b2d9d..b0371ff239365 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -55,7 +55,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] @@ -178,7 +178,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[ENTRY:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL1:%.*]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll index e1c62fe2d043d..cd60817465689 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll @@ -43,7 +43,7 @@ define void @test(ptr %p) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; NO-VP-NEXT: br label [[LOOP:%.*]] @@ -111,7 +111,7 @@ define void @test_may_clobber1(ptr %p) { ; IF-EVL-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP4]], align 32 ; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: exit: @@ -221,7 +221,7 @@ define void @test_may_clobber3(ptr %p) { ; IF-EVL-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP4]], align 32 ; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: exit: @@ -285,7 +285,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: exit: @@ -299,7 +299,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; NO-VP-NEXT: br label [[LOOP:%.*]] @@ -372,7 +372,7 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: exit: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll index d4d7d398185a1..ea49a27b363db 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll @@ -42,7 +42,7 @@ define void @load_store_interleave_group(ptr noalias %data) { ; EPILOGUE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; EPILOGUE: [[VECTOR_PH]]: ; EPILOGUE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; EPILOGUE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; EPILOGUE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; EPILOGUE-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; EPILOGUE-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; EPILOGUE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -141,7 +141,7 @@ define void @load_store_interleave_group_i32(ptr noalias %data) { ; EPILOGUE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; EPILOGUE: [[VECTOR_PH]]: ; EPILOGUE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; EPILOGUE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; EPILOGUE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; EPILOGUE-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; EPILOGUE-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; EPILOGUE-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 232c354764e1a..a1b8cbbabeece 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -56,7 +56,7 @@ define void @truncate_i16_to_i8_cse(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4294967296, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4294967296, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[N_VEC]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 0a64723b6ff9d..2a63d4b22886e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -42,7 +42,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll index cfb180594b0ec..5cc68753b7c1d 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll @@ -16,14 +16,10 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 0, 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 1, 4 -; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 2, 4 -; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 3, 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 12 ; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP6]], align 1 @@ -33,8 +29,7 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP11]], i32 3 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr getelementptr inbounds nuw (i8, ptr @src, i64 16), align 4 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0 ; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index e0dd3768ec111..a6bbf8683c592 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -17,8 +17,8 @@ define i64 @test_foldable_live_in_via_scev() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = mul <2 x i64> [[VEC_PHI]], splat (i64 2) -; CHECK-NEXT: [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], splat (i64 2) +; CHECK-NEXT: [[TMP0]] = shl <2 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP1]] = shl <2 x i64> [[VEC_PHI1]], splat (i64 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index a1b92e0658bd3..000064727f13e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -195,7 +195,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[DOTSPLAT15:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT14]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = mul <8 x i16> , [[DOTSPLAT15]] ; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT16]], [[TMP22]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8 +; CHECK-NEXT: [[TMP15:%.*]] = shl i16 [[TMP0]], 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 04bff3c393f62..009d386f52c77 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -685,7 +685,7 @@ define void @wombat(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[ARG]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -755,7 +755,7 @@ define void @wombat2(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[ARG]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -828,7 +828,7 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[ARG]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index dbd7019188d07..2d3ab3e8b5c43 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -345,10 +345,10 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 -; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[TMP1]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP3]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index f9b512700f608..c8ec3bd8bcf0e 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -49,7 +49,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> , [[DOTSPLAT1]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[INDUCTION_IV]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index e80fa3edba0c7..39af9cf382c1d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -185,17 +185,17 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: CLONE ir<%for.x.next> = mul ir<%x>, ir<2> +; CHECK-NEXT: EMIT vp<[[SHL:%.+]]> = shl ir<%x>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, vp<[[SHL]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]> -; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next> +; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, vp<[[SHL]]> ; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev> ; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 @@ -208,7 +208,7 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next> +; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part vp<[[SHL]]> ; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]> ; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev> ; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]> @@ -269,14 +269,14 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, vp<%6> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<%l>, ir<2> -; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next> +; CHECK-NEXT: EMIT vp<%6> = shl ir<%l>, ir<1> +; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, vp<%6> ; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev> ; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 @@ -289,7 +289,7 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next> +; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part vp<%6> ; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]> ; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev> ; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]> diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll index f6dd8564c001b..44e77e0a01f45 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll @@ -829,8 +829,8 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP26]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP23]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP22:%.*]] = mul <2 x i32> [[TMP18]], splat (i32 2) +; CHECK-NEXT: [[TMP41:%.*]] = phi <2 x i32> [ [[TMP23]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP22:%.*]] = shl <2 x i32> [[TMP41]], splat (i32 1) ; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0 ; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] @@ -1089,7 +1089,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] ; CHECK: [[PRED_LOAD_CONTINUE7]]: ; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP29]], %[[PRED_LOAD_CONTINUE5]] ], [ [[TMP32]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP34:%.*]] = mul <2 x i32> [[TMP33]], splat (i32 2) +; CHECK-NEXT: [[TMP34:%.*]] = shl <2 x i32> [[TMP33]], splat (i32 1) ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] ; CHECK: [[PRED_LOAD_IF8]]: diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index eab9df558f608..018578b15d4b9 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1727,7 +1727,7 @@ define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2) +; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[VEC_PHI]], splat (i32 1) ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 5cc228f382983..76dc7a471c066 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -39,7 +39,7 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N) ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <8 x i32> , [[DOTSPLAT3]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i32 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -128,7 +128,7 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N, ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> , [[DOTSPLAT3]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP8]] -; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i32 [[J_012]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i32 [[J_012]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -221,7 +221,7 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) { ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> , [[DOTSPLAT6]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP3]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -329,7 +329,7 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> , [[DOTSPLAT1]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl i16 [[O_1]], 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -399,7 +399,7 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> , [[DOTSPLAT1]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = shl i16 [[TMP0]], 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index b6fb378a042fd..454a4116a6322 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -5902,7 +5902,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = shl nsw i32 [[STEP]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 8d3d0ff7a6406..84e4aebd9425a 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -478,10 +478,10 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP6]], align 2 ; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 2 @@ -549,10 +549,10 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) -; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll index 6746e92cc1fd1..a76e337bdea59 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll @@ -13,7 +13,7 @@ define void @widengep_narrow(ptr %in, ptr noalias %p) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[IN]], i64 8 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 1a1c05187590e..518d589c6c7cc 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -455,7 +455,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl i8 [[INDUCTION_IV]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -484,7 +484,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i8> , [[DOTSPLAT11]] ; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT9]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = shl i8 [[INDUCTION_IV]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -542,7 +542,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = mul <4 x i8> , [[DOTSPLAT]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = shl i8 [[INDUCTION_IV]], 2 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] @@ -571,7 +571,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT11]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT9]], [[TMP8]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = shl i8 [[INDUCTION_IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index b224a5a86b83d..ecbf24b06e845 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -21,7 +21,7 @@ define void @foo() { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d96134e8adf1d..995c2016339a4 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -243,7 +243,7 @@ define void @non_constant_vector_expansion(i32 %0, ptr %call) { ; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[OFFSET_IDX]] ; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 4 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4 +; STRIDED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 2 ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; STRIDED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index 623a9435edec1..81e873e7ef5fd 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -73,7 +73,7 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = shl nsw i32 [[STEP]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -194,7 +194,7 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP18]] -; CHECK-NEXT: [[TMP17:%.*]] = mul nsw i32 [[STEP]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[STEP]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -387,7 +387,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP16]] -; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i32 [[CONV]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll index 25cc5b22764e1..5896fc04431d2 100644 --- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll @@ -29,7 +29,7 @@ define void @test1_pr58811(ptr %dst) { ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] ; VF2-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP3]] -; VF2-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV]], 2 +; VF2-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV]], 1 ; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -92,7 +92,7 @@ define void @test1_pr58811(ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -201,7 +201,7 @@ define void @test2_pr58811(ptr %dst) { ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] ; VF2-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP3]] -; VF2-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV_LCSSA]], 2 +; VF2-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV_LCSSA]], 1 ; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -266,7 +266,7 @@ define void @test2_pr58811(ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV_LCSSA]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV_LCSSA]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -357,7 +357,7 @@ define void @test3_pr58811(ptr %dst) { ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] ; VF2-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP5]] -; VF2-NEXT: [[TMP6:%.*]] = mul i32 [[TMP3]], 2 +; VF2-NEXT: [[TMP6:%.*]] = shl i32 [[TMP3]], 1 ; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -419,7 +419,7 @@ define void @test3_pr58811(ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP3]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index f254b19bba173..32d1c3f38b26a 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -11,8 +11,8 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -82,8 +82,8 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP6]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) @@ -148,10 +148,10 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index e892459fa33f5..e7869c167603f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -33,7 +33,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: ; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -96,8 +96,8 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP10]], 4 -; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP12]], 2 +; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP10]], 2 +; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 1 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -189,7 +189,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: ; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() @@ -248,8 +248,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP3]], 4 -; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP11]], 2 +; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP3]], 2 +; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP11]], 1 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() @@ -371,7 +371,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF1-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: ; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 +; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]] @@ -447,8 +447,8 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP11]], 4 -; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP13]], 2 +; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP11]], 2 +; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP13]], 1 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]] @@ -544,7 +544,7 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: ; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -570,7 +570,7 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -618,7 +618,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: ; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 +; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP3]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 @@ -663,10 +663,10 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4 +; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 2 ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 2 +; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = shl nuw i32 [[TMP5]], 1 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP6]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 @@ -745,7 +745,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF1-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: ; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() @@ -802,8 +802,8 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP7]], 2 +; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP5]], 2 +; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP7]], 1 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 62945d8eba6da..fe5cf2fa2c406 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -179,10 +179,10 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw [[TMP6]], splat (i32 1) -; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i32 [[TMP8]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i32 [[TMP9]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -190,7 +190,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index 64ec3ab2cf214..efc949ae53192 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -13,8 +13,8 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 [[TMP5]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll index 850d6b299ca23..c1a004e67c4d9 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -17,7 +17,7 @@ define void @test(ptr %d) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -85,7 +85,7 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index db2d13af38d63..2f7a001f760b1 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -14,7 +14,7 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) { ; CHECKUF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECKUF1: [[VECTOR_PH]]: ; CHECKUF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECKUF1-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECKUF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECKUF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECKUF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -42,8 +42,8 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) { ; CHECKUF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECKUF2: [[VECTOR_PH]]: ; CHECKUF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECKUF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP3]], 2 +; CHECKUF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2 +; CHECKUF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP3]], 1 ; CHECKUF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECKUF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECKUF2-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll index b63ab8fe8e84e..f5b445641097c 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll @@ -14,7 +14,7 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) { ; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 256, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index bdd7bb8f3df98..a1594e20efd21 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -12,8 +12,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP2]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP4]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP2]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 256, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 256, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll index ae96fe58caa26..1acd48dbf25d2 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -10,7 +10,7 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 @@ -68,7 +68,7 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[ARG1:%.*]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index 163faa2254700..e40e8bb10a3f8 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -90,7 +90,7 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 2 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index f80d7b695e2af..c64a333b990ba 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -13,7 +13,7 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) @@ -55,7 +55,7 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42 @@ -151,12 +151,12 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 @@ -206,7 +206,7 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) @@ -311,12 +311,12 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 @@ -366,12 +366,12 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 @@ -465,7 +465,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) @@ -507,12 +507,12 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1 @@ -608,12 +608,12 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 @@ -663,7 +663,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) @@ -768,12 +768,12 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 @@ -824,12 +824,12 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index 7ff10c544f72a..1512cd4986e36 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -15,7 +15,7 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = urem <8 x i64> [[TMP0]], splat (i64 3) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP1]], i32 1 @@ -91,7 +91,7 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[TMP0]], splat (i64 2) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], splat (i64 3) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP2]], i32 1 @@ -166,30 +166,30 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i64> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP0]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP0]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP0]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[TMP0]], i32 7 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP9]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP10]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP13]], align 8 -; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP14]], align 8 -; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i64> [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i64> [[TMP0]], i32 5 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i64> [[TMP0]], i32 6 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP0]], i32 7 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP10]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP16]], align 8 ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> poison, i64 [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 1 @@ -239,7 +239,7 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 @@ -315,30 +315,30 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i64> [[TMP28]], i64 [[TMP21]], i32 5 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i64> [[TMP29]], i64 [[TMP22]], i32 6 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i64> [[TMP30]], i64 [[TMP23]], i32 7 -; CHECK-NEXT: [[TMP32:%.*]] = udiv <8 x i64> [[TMP31]], splat (i64 2) +; CHECK-NEXT: [[TMP32:%.*]] = lshr <8 x i64> [[TMP31]], splat (i64 1) ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP32]], i32 0 -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP32]], i32 1 -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP32]], i32 2 -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP32]], i32 3 -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP32]], i32 4 -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP32]], i32 5 -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP32]], i32 6 -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP32]], i32 7 -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP33]] -; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP34]] -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP35]] -; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP36]] -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP37]] -; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP38]] -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP39]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP40]] -; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP41]], align 4 -; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP42]], align 4 -; CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP43]], align 4 -; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP44]], align 4 -; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP45]], align 4 -; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP46]], align 4 -; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP47]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP32]], i32 1 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP32]], i32 2 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP32]], i32 3 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i64> [[TMP32]], i32 4 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <8 x i64> [[TMP32]], i32 5 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <8 x i64> [[TMP32]], i32 6 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <8 x i64> [[TMP32]], i32 7 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP34]], align 4 +; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP36]], align 4 +; CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP38]], align 4 +; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP40]], align 4 +; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP42]], align 4 +; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP44]], align 4 +; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP46]], align 4 ; CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP48]], align 4 ; CHECK-NEXT: [[TMP57:%.*]] = insertelement <8 x i32> poison, i32 [[TMP49]], i32 0 ; CHECK-NEXT: [[TMP58:%.*]] = insertelement <8 x i32> [[TMP57]], i32 [[TMP50]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index fb962c017156d..6ce34ad58c753 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -14,8 +14,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] -; VF2-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer ; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP0]], [[TMP1]] ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -48,8 +48,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] -; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer ; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]] ; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 @@ -112,7 +112,7 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VF2-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[TMP0]] ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 @@ -139,8 +139,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] -; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1) ; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]] ; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 @@ -306,8 +306,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 @@ -349,8 +349,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 @@ -427,8 +427,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 @@ -470,8 +470,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 @@ -669,8 +669,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 @@ -711,8 +711,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 @@ -788,8 +788,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 @@ -830,8 +830,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 @@ -1024,8 +1024,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; VF2-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer ; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP0]], [[TMP1]] ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -1058,8 +1058,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer ; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]] ; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 @@ -1123,8 +1123,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; VF2-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) ; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP0]], [[TMP1]] ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -1157,8 +1157,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1) ; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]] ; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 @@ -1324,8 +1324,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 @@ -1367,8 +1367,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 @@ -1445,8 +1445,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 @@ -1488,8 +1488,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 @@ -1687,8 +1687,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 @@ -1730,8 +1730,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 @@ -1808,8 +1808,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 @@ -1851,8 +1851,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll index 199bd5b31c70d..e30ebb59a8198 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -42,7 +42,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: ; NO-VP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP1]], 4 +; NO-VP-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP1]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP14]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index a2f5a9dcfae6c..910caf82ee759 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -1051,8 +1051,8 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 -; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> -; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<%mul> sext to i64) +; CHECK-NEXT: EMIT vp<[[SHL:%.+]]> = shl ir<%l.ext>, ir<7> +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (vp<[[SHL]]> sext to i64) ; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors @@ -1060,7 +1060,7 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]> +; CHECK-NEXT: EMIT vp<[[RES:%.+]]> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 9c4c9534157c7..2f063166b053c 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -28,10 +28,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%cond> = icmp eq ir<%iv>, ir<%x> ; CHECK-NEXT: EMIT vp<[[AND:%.+]]> = logical-and vp<[[MASK]]>, ir<%cond> -; CHECK-NEXT: Successor(s): pred.store +; CHECK-NEXT: Successor(s): pred.load ; CHECK: pred.store: { ; CHECK-NEXT: pred.store.entry: @@ -39,13 +40,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK: pred.store.if: -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr inbounds ir<@b>, ir<0>, vp<[[STEPS]]> -; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10> -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]] -; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add> -; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a> +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]]> +; CHECK-NEXT: REPLICATE store vp<[[SHL:%.+]]>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK: pred.store.continue: @@ -126,7 +122,7 @@ exit: ; CHECK-NEXT: } ; CHECK: loop.0: -; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<2> +; CHECK-NEXT: EMIT vp<[[SHL:%.+]]> = shl ir<%iv>, ir<1> ; CHECK-NEXT: Successor(s): pred.store ; CHECK: pred.store: { @@ -135,7 +131,7 @@ exit: ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, ir<%mul> +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[SHL]]> ; CHECK-NEXT: REPLICATE ir<%add> = add vp<[[PRED]]>, ir<10> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll index cd7ca1e22b499..6e8384420875f 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll @@ -21,7 +21,7 @@ define void @interleave_deinterleave(ptr noalias %dst, ptr %a, ptr %b) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]