-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VectorCombine] Use isSafeToSpeculativelyExecute to guard VP scalarization #69494
[VectorCombine] Use isSafeToSpeculativelyExecute to guard VP scalarization #69494
Conversation
…ation Previously we were just matching against a fixed list of VP intrinsics that we knew couldn't be speculated, but we can reuse the logic in isSafeToSpeculativelyExecuteWithOpcode. This also allows speculation in more cases, e.g. when the divisor is known to be non-zero. Unfortunately we can't reuse the exact same function call for VP intrinsics with functional intrinsics instead of opcodes, because isSafeToSpeculativelyExecute needs an instruction that already exists. So this just copies the logic by peeking into the function attributes of the intrinsic.
@llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesPreviously we were just matching against a fixed list of VP intrinsics that we Unfortunately we can't reuse the exact same function call for VP intrinsics Full diff: https://github.com/llvm/llvm-project/pull/69494.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 64a515270fd57f2..4769f2c04473c99 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -825,23 +825,28 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
ElementCount EC = cast<VectorType>(Op0->getType())->getElementCount();
Value *EVL = VPI.getArgOperand(3);
const DataLayout &DL = VPI.getModule()->getDataLayout();
- bool MustHaveNonZeroVL =
- IntrID == Intrinsic::vp_sdiv || IntrID == Intrinsic::vp_udiv ||
- IntrID == Intrinsic::vp_srem || IntrID == Intrinsic::vp_urem;
-
- if (!MustHaveNonZeroVL || isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT)) {
- Value *ScalarOp0 = getSplatValue(Op0);
- Value *ScalarOp1 = getSplatValue(Op1);
- Value *ScalarVal =
- ScalarIntrID
- ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
- {ScalarOp0, ScalarOp1})
- : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode),
- ScalarOp0, ScalarOp1);
- replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal));
- return true;
- }
- return false;
+
+ bool SafeToSpeculate;
+ if (ScalarIntrID)
+ SafeToSpeculate = Intrinsic::getAttributes(I.getContext(), *ScalarIntrID)
+ .hasFnAttr(Attribute::AttrKind::Speculatable);
+ else
+ SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode(
+ *FunctionalOpcode, &VPI, nullptr, &AC, &DT);
+ if (!SafeToSpeculate && !isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT))
+ return false;
+
+ Value *ScalarOp0 = getSplatValue(Op0);
+ Value *ScalarOp1 = getSplatValue(Op1);
+ Value *ScalarVal =
+ ScalarIntrID
+ ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
+ {ScalarOp0, ScalarOp1})
+ : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode),
+ ScalarOp0, ScalarOp1);
+
+ replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal));
+ return true;
}
/// Match a vector binop or compare instruction with at least one inserted
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
index da183a6b14bc687..e95aea4eb487b87 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
@@ -166,14 +166,23 @@ define <vscale x 1 x i64> @mul_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v
}
define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @sdiv_nxv1i64_allonesmask(
-; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP1:%.*]] = sdiv i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]]
;
%splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
%mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -221,14 +230,23 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
}
define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @udiv_nxv1i64_allonesmask(
-; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP1:%.*]] = udiv i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]]
;
%splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
%mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -276,14 +294,23 @@ define <vscale x 1 x i64> @udiv_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
}
define <vscale x 1 x i64> @srem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @srem_nxv1i64_allonesmask(
-; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP1:%.*]] = srem i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]]
;
%splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
%mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -331,14 +358,23 @@ define <vscale x 1 x i64> @srem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
}
define <vscale x 1 x i64> @urem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @urem_nxv1i64_allonesmask(
-; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP1:%.*]] = urem i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]]
;
%splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
%mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
|
Should we be able to speculate functions like
|
I don't think we support scalarizing any of the *anymask test cases because we can't handle masks just yet: llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp Lines 755 to 767 in deb0c65
|
else | ||
SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode( | ||
*FunctionalOpcode, &VPI, nullptr, &AC, &DT); | ||
if (!SafeToSpeculate && !isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I realize that this is pre-existing code, but this isKnownNonZero check shouldn't be there at all. This is doubly incorrect, because it does not handle poison values correctly, and because it does not account for INT_MIN / -1 UB (unless that does not exist for the VP intrinsics, in which case it needs to be specified in LangRef).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unless I'm missing something, this isKnownNonZero check isn't checking the divisor, it's checking the explicit vector length.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea being that we need to guard against introducing poison/UB that wasn't there before, e.g. if the EVL is zero and the divisor is zero, then there's actually no UB. So we can't scalarize it because then the scalarized version would have UB but the original wouldn't
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh yes, you are right. Ignore what I said.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No worries, thanks for taking a look
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM on my end, but please address @nikic's comment. I didn't see it until after I approved.
Noticed whilst working on llvm#69494. VP intrinsics whose functional equivalent is an intrinsic were being marked as their lanes being non-speculatable, even if the underlying intrinsic was speculatable. This meant that %1 = call <4 x i32> @llvm.vp.umax(<4 x i32> %x, <4 x i32> %y, <4 x i1> %mask, i32 %evl) would be expanded out to %.splatinsert = insertelement <4 x i32> poison, i32 %evl, i64 0 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer %1 = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %.splat %2 = and <4 x i1> %1, %mask %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) instead of %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) The cause of this was isSafeToSpeculativelyExecuteWithOpcode checking the function attributes for the VP instruction itself, not the functional intrinsic. Since isSafeToSpeculativelyExecuteWithOpcode expects an already materialized instruction, we can't use it directly for the intrinsic case. So this fixes it by manually checking the function attributes on the intrinsic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
) Noticed whilst working on #69494. VP intrinsics whose functional equivalent is an intrinsic were being marked as their lanes being non-speculatable, even if the underlying intrinsic was speculatable. This meant that ```llvm %1 = call <4 x i32> @llvm.vp.umax(<4 x i32> %x, <4 x i32> %y, <4 x i1> %mask, i32 %evl) ``` would be expanded out to ```llvm %.splatinsert = insertelement <4 x i32> poison, i32 %evl, i64 0 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer %1 = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %.splat %2 = and <4 x i1> %1, %mask %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) ``` instead of ```llvm %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) ``` The cause of this was isSafeToSpeculativelyExecuteWithOpcode checking the function attributes for the VP instruction itself, not the functional intrinsic. Since isSafeToSpeculativelyExecuteWithOpcode expects an already materialized instruction, we can't use it directly for the intrinsic case. So this fixes it by manually checking the function attributes on the intrinsic.
…m#69504) Noticed whilst working on llvm#69494. VP intrinsics whose functional equivalent is an intrinsic were being marked as their lanes being non-speculatable, even if the underlying intrinsic was speculatable. This meant that ```llvm %1 = call <4 x i32> @llvm.vp.umax(<4 x i32> %x, <4 x i32> %y, <4 x i1> %mask, i32 %evl) ``` would be expanded out to ```llvm %.splatinsert = insertelement <4 x i32> poison, i32 %evl, i64 0 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer %1 = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %.splat %2 = and <4 x i1> %1, %mask %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) ``` instead of ```llvm %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) ``` The cause of this was isSafeToSpeculativelyExecuteWithOpcode checking the function attributes for the VP instruction itself, not the functional intrinsic. Since isSafeToSpeculativelyExecuteWithOpcode expects an already materialized instruction, we can't use it directly for the intrinsic case. So this fixes it by manually checking the function attributes on the intrinsic.
Previously we were just matching against a fixed list of VP intrinsics that we
knew couldn't be speculated, but we can reuse the logic in
isSafeToSpeculativelyExecuteWithOpcode. This also allows speculation in more
cases, e.g. when the divisor is known to be non-zero.
Unfortunately we can't reuse the exact same function call for VP intrinsics
with functional intrinsics instead of opcodes, because
isSafeToSpeculativelyExecute needs an instruction that already exists. So this
just copies the logic by peeking into the function attributes of the intrinsic.