diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index a5e98bb7bc137..a6f4e51e258ab 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1847,6 +1847,10 @@ class TargetTransformInfo { /// otherwise scalar epilogue loop. LLVM_ABI bool preferEpilogueVectorization() const; + /// \returns True if the loop vectorizer should discard any VFs where the + /// maximum register pressure exceeds getNumberOfRegisters. + LLVM_ABI bool shouldConsiderVectorizationRegPressure() const; + /// \returns True if the target wants to expand the given reduction intrinsic /// into a shuffle sequence. LLVM_ABI bool shouldExpandReduction(const IntrinsicInst *II) const; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index b58386b94bba4..566e1cf51631a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1105,6 +1105,8 @@ class TargetTransformInfoImplBase { virtual bool preferEpilogueVectorization() const { return true; } + virtual bool shouldConsiderVectorizationRegPressure() const { return false; } + virtual bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 899806bf37348..09b50c5270e57 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1425,6 +1425,10 @@ bool TargetTransformInfo::preferEpilogueVectorization() const { return TTIImpl->preferEpilogueVectorization(); } +bool TargetTransformInfo::shouldConsiderVectorizationRegPressure() const { + return TTIImpl->shouldConsiderVectorizationRegPressure(); +} + TargetTransformInfo::VPLegalization TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { return TTIImpl->getVPLegalizationStrategy(VPI); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 6bd7d51daff69..47e0a250d285a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -141,6 +141,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase { return false; } + bool shouldConsiderVectorizationRegPressure() const override { return true; } + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b4acda80cfb93..c04b5cb10eac2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -393,6 +393,10 @@ static cl::opt EnableEarlyExitVectorization( cl::desc( "Enable vectorization of early exit loops with uncountable exits.")); +static cl::opt ConsiderRegPressure( + "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, + cl::desc("Discard VFs if their register pressure is too high.")); + // Likelyhood of bypassing the vectorized loop because there are zero trips left // after prolog. See `emitIterationCountCheck`. static constexpr uint32_t MinItersBypassWeights[] = {1, 127}; @@ -3693,6 +3697,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF( ElementCount VF) { + if (ConsiderRegPressure.getNumOccurrences()) + return ConsiderRegPressure; + + // TODO: We should eventually consider register pressure for all targets. The + // TTI hook is temporary whilst target-specific issues are being fixed. + if (TTI.shouldConsiderVectorizationRegPressure()) + return true; + if (!useMaxBandwidth(VF.isScalable() ? TargetTransformInfo::RGK_ScalableVector : TargetTransformInfo::RGK_FixedWidthVector)) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll index 346f1cbcc7e3d..097f05d222cf6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll @@ -1,14 +1,11 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s - -; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow -; unrolling. Calculate register pressure for all VPlans, not just unrolled ones, -; and remove. +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { ; CHECK-LABEL: add -; CHECK: LV(REG): Found max usage: 2 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK: LV(REG): VF = vscale x 4 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll index b25bc485a9ca7..8bbfdf39a0624 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll @@ -1,20 +1,19 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN - -; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow -; unrolling. Calculate register pressure for all VPlans, not just unrolled ones, -; and remove. +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { -; CHECK-LABEL: add -; ZVFH: LV(REG): Found max usage: 2 item -; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; ZVFH-LABEL: add +; ZVFH: LV(REG): VF = vscale x 4 +; ZVFH-NEXT: LV(REG): Found max usage: 2 item +; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; ZVFHMIN: LV(REG): Found max usage: 2 item -; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; ZVFHMIN-LABEL: add +; ZVFHMIN: LV(REG): VF = vscale x 4 +; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item +; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll new file mode 100644 index 0000000000000..42f12ec2e4859 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=true -S < %s | FileCheck %s +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=false -S < %s | FileCheck %s --check-prefix=NO-REG-PRESSURE-CHECK + +define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { +; CHECK-LABEL: define void @f( +; CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP0]], splat (i64 2) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 4) +; CHECK-NEXT: [[INDUCTION2:%.*]] = add zeroinitializer, [[TMP5]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND4:%.*]] = phi [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], [[TMP13]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) +; CHECK-NEXT: [[TMP15:%.*]] = sub [[VEC_IND3]], splat (i64 1) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], [[TMP15]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP16]], splat (i1 true), i32 [[TMP6]]) +; CHECK-NEXT: [[TMP17:%.*]] = sub [[VEC_IND4]], splat (i64 1) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], [[TMP17]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP18]], splat (i1 true), i32 [[TMP6]]) +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0 +; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv12i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER9]], [[WIDE_MASKED_GATHER10]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv12i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1 +; CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]] +; CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1 +; CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1 +; CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]] +; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1 +; CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1 +; CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]] +; CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1 +; CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]] +; CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0 +; CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1 +; CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1 +; CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1 +; CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2 +; CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2 +; CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3 +; CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4 +; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024 +; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +; NO-REG-PRESSURE-CHECK-LABEL: define void @f( +; NO-REG-PRESSURE-CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-REG-PRESSURE-CHECK-NEXT: [[ENTRY:.*:]] +; NO-REG-PRESSURE-CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-REG-PRESSURE-CHECK: [[VECTOR_PH]]: +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.stepvector.nxv8i64() +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP0]], splat (i64 2) +; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv8i64() +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 3) +; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP3]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv8i64() +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 4) +; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION2:%.*]] = add zeroinitializer, [[TMP5]] +; NO-REG-PRESSURE-CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-REG-PRESSURE-CHECK: [[VECTOR_BODY]]: +; NO-REG-PRESSURE-CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND4:%.*]] = phi [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]] +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]] +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], [[TMP13]] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP15:%.*]] = sub [[VEC_IND3]], splat (i64 1) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], [[TMP15]] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP16]], splat (i1 true), i32 [[TMP6]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP17:%.*]] = sub [[VEC_IND4]], splat (i64 1) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], [[TMP17]] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP18]], splat (i1 true), i32 [[TMP6]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv24i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER9]], [[WIDE_MASKED_GATHER10]]) +; NO-REG-PRESSURE-CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] +; NO-REG-PRESSURE-CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-REG-PRESSURE-CHECK: [[MIDDLE_BLOCK]]: +; NO-REG-PRESSURE-CHECK-NEXT: br label %[[EXIT:.*]] +; NO-REG-PRESSURE-CHECK: [[SCALAR_PH]]: +; NO-REG-PRESSURE-CHECK-NEXT: br label %[[LOOP:.*]] +; NO-REG-PRESSURE-CHECK: [[LOOP]]: +; NO-REG-PRESSURE-CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]] +; NO-REG-PRESSURE-CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]] +; NO-REG-PRESSURE-CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]] +; NO-REG-PRESSURE-CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]] +; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0 +; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1 +; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2 +; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4 +; NO-REG-PRESSURE-CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024 +; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; NO-REG-PRESSURE-CHECK: [[EXIT]]: +; NO-REG-PRESSURE-CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %wide.iv.0 = phi i64 [ 0, %entry ], [ %wide.iv.0.next, %loop ] + %wide.iv.1 = phi i64 [ 0, %entry ], [ %wide.iv.1.next, %loop ] + %wide.iv.2 = phi i64 [ 0, %entry ], [ %wide.iv.2.next, %loop ] + + %wide.iv.0.sub = sub i64 %wide.iv.0, 1 + %a.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.0.sub + %a = load i8, ptr %a.gep0 + + %wide.iv.1.sub = sub i64 %wide.iv.1, 1 + %b.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.1.sub + %b = load i8, ptr %b.gep0 + + %wide.iv.2.sub = sub i64 %wide.iv.2, 1 + %c.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.2.sub + %c = load i8, ptr %c.gep0 + + %iv.mul = mul i64 %iv, 3 + %base = getelementptr i8, ptr %p1, i64 %iv.mul + + %a.gep1 = getelementptr i8, ptr %base, i8 0 + store i8 %a, ptr %a.gep1 + + %b.gep1 = getelementptr i8, ptr %base, i8 1 + store i8 %b, ptr %b.gep1 + + %c.gep1 = getelementptr i8, ptr %base, i8 2 + store i8 %c, ptr %c.gep1 + + %iv.next = add i64 %iv, 1 + %wide.iv.0.next = add i64 %wide.iv.0, 2 + %wide.iv.1.next = add i64 %wide.iv.1, 3 + %wide.iv.2.next = add i64 %wide.iv.2, 4 + %done = icmp eq i64 %iv, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll index 116ccc9961795..99139da67bb78 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -5,50 +5,54 @@ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=1 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=1 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=2 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=2 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=4 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=4 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=8 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=8 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8 -; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow -; unrolling. Calculate register pressure for all VPlans, not just unrolled ones, -; and remove. - define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { -; CHECK-LABEL: add +; CHECK-SCALAR-LABEL: add ; CHECK-SCALAR: LV(REG): VF = 1 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 2 item ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers ; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL1: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL1-LABEL: add +; CHECK-LMUL1: LV(REG): VF = vscale x 2 +; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL2: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL2-LABEL: add +; CHECK-LMUL2: LV(REG): VF = vscale x 4 +; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL4: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL4-LABEL: add +; CHECK-LMUL4: LV(REG): VF = vscale x 8 +; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL8: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL8-LABEL: add +; CHECK-LMUL8: LV(REG): VF = vscale x 16 +; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers ; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers @@ -76,22 +80,26 @@ for.body: } define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) { -; CHECK-LABEL: goo +; CHECK-SCALAR-LABEL: goo ; CHECK-SCALAR: LV(REG): VF = 1 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers -; CHECK-LMUL1: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers -; CHECK-LMUL2: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers -; CHECK-LMUL4: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers -; CHECK-LMUL8: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers +; CHECK-LMUL1: LV(REG): VF = vscale x 2 +; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers +; CHECK-LMUL2: LV(REG): VF = vscale x 4 +; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-LMUL4: LV(REG): VF = vscale x 8 +; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers +; CHECK-LMUL8: LV(REG): VF = vscale x 16 +; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers entry: %cmp3 = icmp sgt i32 %n, 0 br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup