From d120c79bfdf69bd6603d50fd47bd0857a1d96221 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Fri, 25 Oct 2024 02:31:10 -0700 Subject: [PATCH 1/4] [VPlan] Support VPReverseVectorPointer in DataWithEVL vectorization --- .../Transforms/Vectorize/VPlanTransforms.cpp | 8 ++++ .../Transforms/Vectorize/VPlanVerifier.cpp | 4 +- ...-force-tail-with-evl-reverse-load-store.ll | 44 +++++++++++++++++-- ...orize-force-tail-with-evl-uniform-store.ll | 5 ++- 4 files changed, 53 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c1b9d6ede5109..937a8f804bacd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1445,6 +1445,14 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VPTypeAnalysis TypeInfo(CanonicalIVType); LLVMContext &Ctx = CanonicalIVType->getContext(); SmallVector HeaderMasks = collectAllHeaderMasks(Plan); + + for (VPUser *U : Plan.getVF().users()) { + auto *CurRecipe = cast(U); + + if (auto *R = dyn_cast(CurRecipe)) + R->setOperand(1, &EVL); + } + for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) { for (VPUser *U : collectUsersRecursively(HeaderMask)) { auto *CurRecipe = cast(U); diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 8bdb313324358..8169def00b4ab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -143,8 +143,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { .Case([&](const VPWidenStoreEVLRecipe *S) { return VerifyEVLUse(*S, 2); }) - .Case([&](const VPWidenLoadEVLRecipe *L) { - return VerifyEVLUse(*L, 1); + .Case( + [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) .Case([&](const VPWidenEVLRecipe *W) { return VerifyEVLUse( diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index cd36aee8ae530..dde114529cb73 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -49,9 +49,26 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP11]] +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP15]] +; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP15]] +; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP16]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP17]] +; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP19]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -163,9 +180,28 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP17]] +; IF-EVL-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP18]] +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 [[TMP19]] +; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP21]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 0, [[TMP23]] +; IF-EVL-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP23]] +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP24]] +; IF-EVL-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 [[TMP25]] +; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP27]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP28]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index d15a66fa86392..7efa65ed270ef 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -38,8 +38,9 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]] -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP15]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]] ; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, splat (i1 true), i32 [[TMP11]]) From 83573f5c6ac4f01f2f0185630a9b9952a9a22874 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Mon, 18 Nov 2024 18:50:11 -0800 Subject: [PATCH 2/4] clang-formatted --- llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 8169def00b4ab..89f74540669e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -144,8 +144,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { return VerifyEVLUse(*S, 2); }) .Case( - [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); - }) + [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) .Case([&](const VPWidenEVLRecipe *W) { return VerifyEVLUse( *W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); From 56c14627d536bb705f38c89eb3414ed7335ee114 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Fri, 22 Nov 2024 00:16:03 -0800 Subject: [PATCH 3/4] Address comment --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 937a8f804bacd..ad609da210fd1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1447,9 +1447,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { SmallVector HeaderMasks = collectAllHeaderMasks(Plan); for (VPUser *U : Plan.getVF().users()) { - auto *CurRecipe = cast(U); - - if (auto *R = dyn_cast(CurRecipe)) + if (auto *R = dyn_cast(U)) R->setOperand(1, &EVL); } From 0498645f91917229f2ca91a8aaecfe5257b52241 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Fri, 22 Nov 2024 00:27:25 -0800 Subject: [PATCH 4/4] update tests after rebase --- ...-force-tail-with-evl-reverse-load-store.ll | 56 ++++--------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index dde114529cb73..79471ab21dc78 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -34,36 +34,21 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP4]] -; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]] ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP4]] -; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] +; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]] ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]] ; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP10]] -; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP11]] -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP15]] -; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP15]] -; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP16]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP17]] -; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP19]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -163,40 +148,23 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 100) ; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], zeroinitializer ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]] -; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] +; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP26]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP4]] -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] +; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP27]] ; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] ; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP17]] -; IF-EVL-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP18]] -; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 [[TMP19]] -; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP21]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 0, [[TMP23]] -; IF-EVL-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP23]] -; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP24]] -; IF-EVL-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 [[TMP25]] -; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP27]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP28]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]