diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bb4eef5a41c09..95580b38d7337 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7373,8 +7373,10 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( VPlanPatternMatch::m_Select(VPlanPatternMatch::m_VPValue(), VPlanPatternMatch::m_VPValue(BackedgeVal), VPlanPatternMatch::m_VPValue())); - EpiRedHeaderPhi = cast( + EpiRedHeaderPhi = cast_if_present( vputils::findRecipe(BackedgeVal, IsaPred)); + if (!EpiRedHeaderPhi) + return; } Value *MainResumeValue; @@ -9345,12 +9347,22 @@ static void fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L, // Fix induction resume values from the additional bypass block. IRBuilder<> BypassBuilder(BypassBlock, BypassBlock->getFirstInsertionPt()); for (const auto &[IVPhi, II] : LVL.getInductionVars()) { - auto *Inc = cast(IVPhi->getIncomingValueForBlock(PH)); + Value *IncomingFromPH = IVPhi->getIncomingValueForBlock(PH); Value *V = createInductionAdditionalBypassValues( IVPhi, II, BypassBuilder, ExpandedSCEVs, MainVectorTripCount, LVL.getPrimaryInduction()); - // TODO: Directly add as extra operand to the VPResumePHI recipe. - Inc->setIncomingValueForBlock(BypassBlock, V); + + if (auto *Inc = dyn_cast(IncomingFromPH)) { + // TODO: Directly add as extra operand to the VPResumePHI recipe. + Inc->setIncomingValueForBlock(BypassBlock, V); + } else { + // If the incoming value from preheader is not a PHI node (e.g., a + // constant in functions with GC statepoint), directly add an incoming + // value from BypassBlock to IVPhi. The incoming value from PH remains + // unchanged (IncomingFromPH). + if (IVPhi->getBasicBlockIndex(BypassBlock) == -1) + IVPhi->addIncoming(V, BypassBlock); + } } } diff --git a/llvm/test/Transforms/LoopVectorize/pr179407.ll b/llvm/test/Transforms/LoopVectorize/pr179407.ll new file mode 100644 index 0000000000000..54b767d5ff3b4 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr179407.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization -force-vector-width=2 -epilogue-vectorization-force-VF=2 -S < %s | FileCheck %s +; +; Test case for issue #179407: LoopVectorize should not crash during +; epilogue vectorization when the preheader has been simplified. + + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @wombat(i64 %arg) { +; CHECK-LABEL: define void @wombat( +; CHECK-SAME: i64 [[ARG:%.*]]) { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[BIN_RDX5:%.*]] = phi <2 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[BIN_RDX5]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[VEC_PHI4]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX6:%.*]] = or <2 x i32> [[VEC_PHI4]], [[BIN_RDX5]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX6]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP4]], i32 0, i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[BB5:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF7:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC8:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF7]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <2 x i32> [ [[TMP5]], %[[VEC_EPILOG_PH]] ], [ [[VEC_PHI10]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX9]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC8]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[VEC_PHI10]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]] +; CHECK-NEXT: [[RDX_SELECT12:%.*]] = select i1 [[TMP9]], i32 0, i32 0 +; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC8]] +; CHECK-NEXT: br i1 [[CMP_N13]], label %[[BB5]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[TMP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i32 [ [[RDX_SELECT12]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[OR:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: [[SELECT]] = select i1 [[ICMP]], i32 0, i32 [[PHI3]] +; CHECK-NEXT: [[OR]] = or i32 [[PHI2]], 0 +; CHECK-NEXT: [[ADD]] = add i64 [[PHI]], 1 +; CHECK-NEXT: [[ICMP4:%.*]] = icmp ult i64 [[PHI]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP4]], label %[[BB1]], label %[[BB5]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[BB5]]: +; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ [[SELECT]], %[[BB1]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_SELECT12]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[PHI7:%.*]] = phi i32 [ [[OR]], %[[BB1]] ], [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %phi = phi i64 [ 0, %bb ], [ %add, %bb1 ] + %phi2 = phi i32 [ 0, %bb ], [ %or, %bb1 ] + %phi3 = phi i32 [ 0, %bb ], [ %select, %bb1 ] + %icmp = icmp eq i32 0, 0 + %select = select i1 %icmp, i32 0, i32 %phi3 + %or = or i32 %phi2, 0 + %add = add i64 %phi, 1 + %icmp4 = icmp ult i64 %phi, %arg + br i1 %icmp4, label %bb1, label %bb5 + +bb5: ; preds = %bb1 + %phi6 = phi i32 [ %select, %bb1 ] + %phi7 = phi i32 [ %or, %bb1 ] + ret void +} + +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[PROF3]] = !{!"branch_weights", i32 2, i32 2} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;.