diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9543d97616ae3..743c4f574e131 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4016,6 +4016,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } + case Intrinsic::experimental_get_vector_length: { + // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes + unsigned BitWidth = + std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(), + II->getType()->getScalarSizeInBits()); + ConstantRange Cnt = + computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false, + SQ.getWithInstruction(II)) + .zextOrTrunc(BitWidth); + ConstantRange MaxLanes = cast(II->getArgOperand(1)) + ->getValue() + .zextOrTrunc(Cnt.getBitWidth()); + if (cast(II->getArgOperand(2))->isOne()) + MaxLanes = MaxLanes.multiply( + getVScaleRange(II->getFunction(), Cnt.getBitWidth())); + + if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes)) + return replaceInstUsesWith( + *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType())); + return nullptr; + } default: { // Handle target specific intrinsics std::optional V = targetInstCombineIntrinsic(*II); diff --git a/llvm/test/Transforms/InstCombine/get_vector_length.ll b/llvm/test/Transforms/InstCombine/get_vector_length.ll new file mode 100644 index 0000000000000..122beeae866f3 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/get_vector_length.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine,verify -S | FileCheck %s + +define i32 @cnt_known_lt() { +; CHECK-LABEL: define i32 @cnt_known_lt() { +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.get.vector.length(i32 1, i32 2, i1 false) + ret i32 %x +} + +define i32 @cnt_not_known_lt() { +; CHECK-LABEL: define i32 @cnt_not_known_lt() { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 false) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 false) + ret i32 %x +} + +define i32 @cnt_known_lt_scalable() vscale_range(2, 4) { +; CHECK-LABEL: define i32 @cnt_known_lt_scalable( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i32 2 +; + %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true) + ret i32 %x +} + +define i32 @cnt_not_known_lt_scalable() { +; CHECK-LABEL: define i32 @cnt_not_known_lt_scalable() { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 true) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true) + ret i32 %x +} + +define i32 @cnt_known_lt_runtime(i32 %x) { +; CHECK-LABEL: define i32 @cnt_known_lt_runtime( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[X]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]]) +; CHECK-NEXT: ret i32 [[X]] +; + %icmp = icmp ule i32 %x, 3 + call void @llvm.assume(i1 %icmp) + %y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 3, i1 false) + ret i32 %y +} + +define i32 @cnt_known_lt_runtime_trunc(i64 %x) { +; CHECK-LABEL: define i32 @cnt_known_lt_runtime_trunc( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[X]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]]) +; CHECK-NEXT: [[Y:%.*]] = trunc nuw nsw i64 [[X]] to i32 +; CHECK-NEXT: ret i32 [[Y]] +; + %icmp = icmp ule i64 %x, 3 + call void @llvm.assume(i1 %icmp) + %y = call i32 @llvm.experimental.get.vector.length(i64 %x, i32 3, i1 false) + ret i32 %y +} + +; FIXME: We should be able to deduce the constant range from AssumptionCache +; rather than relying on KnownBits, which in this case only knows x <= 3. +define i32 @cnt_known_lt_runtime_assumption(i32 %x) { +; CHECK-LABEL: define i32 @cnt_known_lt_runtime_assumption( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[X]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]]) +; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[X]], i32 2, i1 false) +; CHECK-NEXT: ret i32 [[Y]] +; + %icmp = icmp ule i32 %x, 2 + call void @llvm.assume(i1 %icmp) + %y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 2, i1 false) + ret i32 %y +} + + +define i32 @cnt_known_lt_i16() { +; CHECK-LABEL: define i32 @cnt_known_lt_i16() { +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.get.vector.length(i16 1, i32 2, i1 false) + ret i32 %x +}