-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[X86] Add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #156822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
a81c406
2fadf3f
f8362b4
ed4a09f
df6242e
9f2fb43
929d7c0
f91aa21
4f5fb87
2422cd4
a3575c5
b2cac3e
197123a
b733157
1ce4883
9a7c138
a65f4fc
9877317
404d261
1d61bf2
b25aa5e
4bc2341
242165a
d2e5d43
6d57df0
03e4db0
c2117f6
5c7412f
90200be
5df6aff
202c165
34ee8ed
9ec2672
7e15580
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2742,6 +2742,86 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC, | |
| return true; | ||
| } | ||
|
|
||
| static bool interp_builtin_horizontal_int_binop( | ||
| InterpState &S, CodePtr OpPC, const CallExpr *Call, | ||
| llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) { | ||
| assert(Call->getNumArgs() == 2); | ||
|
|
||
| assert(Call->getArg(0)->getType()->isVectorType() && | ||
| Call->getArg(1)->getType()->isVectorType()); | ||
| const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); | ||
| assert(VT->getElementType()->isIntegralOrEnumerationType()); | ||
| PrimType ElemT = *S.getContext().classify(VT->getElementType()); | ||
| bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); | ||
|
|
||
| const Pointer &RHS = S.Stk.pop<Pointer>(); | ||
| const Pointer &LHS = S.Stk.pop<Pointer>(); | ||
| const Pointer &Dst = S.Stk.peek<Pointer>(); | ||
|
|
||
| unsigned SourceLen = VT->getNumElements(); | ||
| assert(SourceLen % 2 == 0 && | ||
| Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == | ||
| SourceLen); | ||
|
||
| unsigned DstElem = 0; | ||
|
|
||
| for (unsigned I = 0; I != SourceLen; I += 2) { | ||
|
||
| INT_TYPE_SWITCH_NO_BOOL(ElemT, { | ||
| APSInt Elem1 = LHS.elem<T>(I).toAPSInt(); | ||
| APSInt Elem2 = LHS.elem<T>(I + 1).toAPSInt(); | ||
| Dst.elem<T>(DstElem) = | ||
| static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned)); | ||
| }); | ||
| ++DstElem; | ||
| } | ||
| for (unsigned I = 0; I != SourceLen; I += 2) { | ||
| INT_TYPE_SWITCH_NO_BOOL(ElemT, { | ||
| APSInt Elem1 = RHS.elem<T>(I).toAPSInt(); | ||
| APSInt Elem2 = RHS.elem<T>(I + 1).toAPSInt(); | ||
| Dst.elem<T>(DstElem) = | ||
| static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned)); | ||
| }); | ||
| ++DstElem; | ||
| } | ||
| Dst.initializeAllElements(); | ||
| return true; | ||
| } | ||
|
|
||
| static bool interp_builtin_horizontal_fp_binop( | ||
| InterpState &S, CodePtr OpPC, const CallExpr *Call, | ||
| llvm::function_ref<APFloat(const APFloat &, const APFloat &, | ||
| llvm::RoundingMode)> | ||
| Fn) { | ||
| assert(Call->getNumArgs() == 2); | ||
| assert(Call->getArg(0)->getType()->isVectorType() && | ||
| Call->getArg(1)->getType()->isVectorType()); | ||
| const Pointer &RHS = S.Stk.pop<Pointer>(); | ||
| const Pointer &LHS = S.Stk.pop<Pointer>(); | ||
| const Pointer &Dst = S.Stk.peek<Pointer>(); | ||
|
|
||
| FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); | ||
| llvm::RoundingMode RM = getRoundingMode(FPO); | ||
| const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); | ||
| unsigned SourceLen = VT->getNumElements(); | ||
| assert(SourceLen % 2 == 0 && | ||
| Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == | ||
| SourceLen); | ||
| unsigned DstElem = 0; | ||
| for (unsigned I = 0; I != SourceLen; I += 2) { | ||
| using T = PrimConv<PT_Float>::T; | ||
| APFloat Elem1 = LHS.elem<T>(I).getAPFloat(); | ||
| APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat(); | ||
| Dst.elem<T>(DstElem++) = static_cast<T>(APFloat(Fn(Elem1, Elem2, RM))); | ||
|
||
| } | ||
| for (unsigned I = 0; I != SourceLen; I += 2) { | ||
| using T = PrimConv<PT_Float>::T; | ||
| APFloat Elem1 = RHS.elem<T>(I).getAPFloat(); | ||
| APFloat Elem2 = RHS.elem<T>(I + 1).getAPFloat(); | ||
| Dst.elem<T>(DstElem++) = static_cast<T>(APFloat(Fn(Elem1, Elem2, RM))); | ||
| } | ||
| Dst.initializeAllElements(); | ||
| return true; | ||
| } | ||
|
|
||
| static bool interp__builtin_elementwise_triop_fp( | ||
| InterpState &S, CodePtr OpPC, const CallExpr *Call, | ||
| llvm::function_ref<APFloat(const APFloat &, const APFloat &, | ||
|
|
@@ -3453,6 +3533,55 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, | |
| case Builtin::BI__builtin_elementwise_min: | ||
| return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID); | ||
|
|
||
| case clang::X86::BI__builtin_ia32_phaddw128: | ||
| case clang::X86::BI__builtin_ia32_phaddw256: | ||
| case clang::X86::BI__builtin_ia32_phaddd128: | ||
| case clang::X86::BI__builtin_ia32_phaddd256: | ||
| return interp_builtin_horizontal_int_binop( | ||
| S, OpPC, Call, | ||
| [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); | ||
| case clang::X86::BI__builtin_ia32_phaddsw128: | ||
| case clang::X86::BI__builtin_ia32_phaddsw256: | ||
| return interp_builtin_horizontal_int_binop( | ||
| S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { | ||
| return LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS); | ||
whytolearn marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| }); | ||
| case clang::X86::BI__builtin_ia32_phsubw128: | ||
| case clang::X86::BI__builtin_ia32_phsubw256: | ||
| case clang::X86::BI__builtin_ia32_phsubd128: | ||
| case clang::X86::BI__builtin_ia32_phsubd256: | ||
| return interp_builtin_horizontal_int_binop( | ||
| S, OpPC, Call, | ||
| [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; }); | ||
| case clang::X86::BI__builtin_ia32_phsubsw128: | ||
| case clang::X86::BI__builtin_ia32_phsubsw256: | ||
| return interp_builtin_horizontal_int_binop( | ||
| S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { | ||
| return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); | ||
whytolearn marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| }); | ||
| case clang::X86::BI__builtin_ia32_haddpd: | ||
| case clang::X86::BI__builtin_ia32_haddpd256: | ||
| case clang::X86::BI__builtin_ia32_haddps: | ||
| case clang::X86::BI__builtin_ia32_haddps256: | ||
| return interp_builtin_horizontal_fp_binop( | ||
| S, OpPC, Call, | ||
| [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) { | ||
| APFloat F = LHS; | ||
| F.add(RHS, RM); | ||
| return F; | ||
| }); | ||
| case clang::X86::BI__builtin_ia32_hsubpd: | ||
| case clang::X86::BI__builtin_ia32_hsubpd256: | ||
| case clang::X86::BI__builtin_ia32_hsubps: | ||
| case clang::X86::BI__builtin_ia32_hsubps256: | ||
| return interp_builtin_horizontal_fp_binop( | ||
| S, OpPC, Call, | ||
| [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) { | ||
| APFloat F = LHS; | ||
| F.subtract(RHS, RM); | ||
| return F; | ||
| }); | ||
|
|
||
| case clang::X86::BI__builtin_ia32_pmuldq128: | ||
| case clang::X86::BI__builtin_ia32_pmuldq256: | ||
| case clang::X86::BI__builtin_ia32_pmuldq512: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,6 +55,7 @@ | |
| #include "clang/Basic/TargetBuiltins.h" | ||
| #include "clang/Basic/TargetInfo.h" | ||
| #include "llvm/ADT/APFixedPoint.h" | ||
| #include "llvm/ADT/APInt.h" | ||
|
||
| #include "llvm/ADT/Sequence.h" | ||
| #include "llvm/ADT/SmallBitVector.h" | ||
| #include "llvm/ADT/StringExtras.h" | ||
|
|
@@ -12067,6 +12068,145 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { | |
| return Success(APValue(ResultElements.data(), ResultElements.size()), E); | ||
| } | ||
|
|
||
| case clang::X86::BI__builtin_ia32_phaddw128: | ||
| case clang::X86::BI__builtin_ia32_phaddw256: | ||
| case clang::X86::BI__builtin_ia32_phaddd128: | ||
| case clang::X86::BI__builtin_ia32_phaddd256: | ||
| case clang::X86::BI__builtin_ia32_phaddsw128: | ||
| case clang::X86::BI__builtin_ia32_phaddsw256: | ||
|
|
||
| case clang::X86::BI__builtin_ia32_phsubw128: | ||
| case clang::X86::BI__builtin_ia32_phsubw256: | ||
| case clang::X86::BI__builtin_ia32_phsubd128: | ||
| case clang::X86::BI__builtin_ia32_phsubd256: | ||
| case clang::X86::BI__builtin_ia32_phsubsw128: | ||
| case clang::X86::BI__builtin_ia32_phsubsw256:{ | ||
| APValue SourceLHS, SourceRHS; | ||
| if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || | ||
| !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) | ||
| return false; | ||
| QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); | ||
| bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); | ||
|
|
||
| unsigned SourceLen = SourceLHS.getVectorLength(); | ||
| SmallVector<APValue, 4> ResultElements; | ||
| ResultElements.reserve(SourceLen); | ||
| for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) { | ||
| APSInt LHSA = SourceLHS.getVectorElt(EltNum).getInt(); | ||
| APSInt LHSB = SourceLHS.getVectorElt(EltNum + 1).getInt(); | ||
|
|
||
| switch (E->getBuiltinCallee()) { | ||
| case clang::X86::BI__builtin_ia32_phaddw128: | ||
| case clang::X86::BI__builtin_ia32_phaddw256: | ||
| case clang::X86::BI__builtin_ia32_phaddd128: | ||
| case clang::X86::BI__builtin_ia32_phaddd256: | ||
| ResultElements.push_back( | ||
| APValue(APSInt(LHSA+LHSB, DestUnsigned))); | ||
| break; | ||
| case clang::X86::BI__builtin_ia32_phaddsw128: | ||
| case clang::X86::BI__builtin_ia32_phaddsw256: | ||
| ResultElements.push_back(APValue(APSInt( | ||
| LHSA.isSigned() ? LHSA.sadd_sat(LHSB) : LHSA.uadd_sat(LHSB), | ||
| DestUnsigned))); | ||
| break; | ||
| case clang::X86::BI__builtin_ia32_phsubw128: | ||
| case clang::X86::BI__builtin_ia32_phsubw256: | ||
| case clang::X86::BI__builtin_ia32_phsubd128: | ||
| case clang::X86::BI__builtin_ia32_phsubd256: | ||
| ResultElements.push_back(APValue(APSInt(LHSA - LHSB, DestUnsigned))); | ||
| break; | ||
| case clang::X86::BI__builtin_ia32_phsubsw128: | ||
| case clang::X86::BI__builtin_ia32_phsubsw256: | ||
| ResultElements.push_back(APValue(APSInt( | ||
| LHSA.isSigned() ? LHSA.ssub_sat(LHSB) : LHSA.usub_sat(LHSB), | ||
| DestUnsigned))); | ||
| break; | ||
| } | ||
| } | ||
| for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) { | ||
| APSInt RHSA = SourceRHS.getVectorElt(EltNum).getInt(); | ||
| APSInt RHSB = SourceRHS.getVectorElt(EltNum + 1).getInt(); | ||
|
|
||
| switch (E->getBuiltinCallee()) { | ||
| case clang::X86::BI__builtin_ia32_phaddw128: | ||
| case clang::X86::BI__builtin_ia32_phaddw256: | ||
| case clang::X86::BI__builtin_ia32_phaddd128: | ||
| case clang::X86::BI__builtin_ia32_phaddd256: | ||
| ResultElements.push_back(APValue(APSInt(RHSA + RHSB, DestUnsigned))); | ||
| break; | ||
| case clang::X86::BI__builtin_ia32_phaddsw128: | ||
| case clang::X86::BI__builtin_ia32_phaddsw256: | ||
| ResultElements.push_back(APValue( | ||
| APSInt(RHSA.isSigned() ? RHSA.sadd_sat(RHSB) : RHSA.uadd_sat(RHSB), | ||
| DestUnsigned))); | ||
| break; | ||
| case clang::X86::BI__builtin_ia32_phsubw128: | ||
| case clang::X86::BI__builtin_ia32_phsubw256: | ||
| case clang::X86::BI__builtin_ia32_phsubd128: | ||
| case clang::X86::BI__builtin_ia32_phsubd256: | ||
| ResultElements.push_back(APValue(APSInt(RHSA - RHSB, DestUnsigned))); | ||
| break; | ||
| case clang::X86::BI__builtin_ia32_phsubsw128: | ||
| case clang::X86::BI__builtin_ia32_phsubsw256: | ||
| ResultElements.push_back(APValue( | ||
| APSInt(RHSA.isSigned() ? RHSA.ssub_sat(RHSB) : RHSA.usub_sat(RHSB), | ||
| DestUnsigned))); | ||
| break; | ||
| } | ||
| } | ||
| return Success(APValue(ResultElements.data(), ResultElements.size()), E); | ||
| } | ||
| case clang::X86::BI__builtin_ia32_haddpd: | ||
| case clang::X86::BI__builtin_ia32_haddpd256: | ||
| case clang::X86::BI__builtin_ia32_haddps: | ||
| case clang::X86::BI__builtin_ia32_haddps256: { | ||
| APValue SourceLHS, SourceRHS; | ||
| if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || | ||
| !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) | ||
| return false; | ||
| unsigned SourceLen = SourceLHS.getVectorLength(); | ||
| SmallVector<APValue, 4> ResultElements; | ||
| ResultElements.reserve(SourceLen); | ||
| for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) { | ||
| APFloat LHSA = SourceLHS.getVectorElt(EltNum).getFloat(); | ||
| APFloat LHSB = SourceLHS.getVectorElt(EltNum + 1).getFloat(); | ||
| LHSA.add(LHSB, APFloat::rmNearestTiesToEven); | ||
| ResultElements.push_back(APValue(LHSA)); | ||
| } | ||
| for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) { | ||
| APFloat RHSA = SourceRHS.getVectorElt(EltNum).getFloat(); | ||
| APFloat RHSB = SourceRHS.getVectorElt(EltNum + 1).getFloat(); | ||
| RHSA.add(RHSB, APFloat::rmNearestTiesToEven); | ||
| ResultElements.push_back(APValue(RHSA)); | ||
| } | ||
| return Success(APValue(ResultElements.data(), ResultElements.size()), E); | ||
| } | ||
| case clang::X86::BI__builtin_ia32_hsubpd: | ||
| case clang::X86::BI__builtin_ia32_hsubpd256: | ||
| case clang::X86::BI__builtin_ia32_hsubps: | ||
| case clang::X86::BI__builtin_ia32_hsubps256: { | ||
| APValue SourceLHS, SourceRHS; | ||
| if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || | ||
| !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) | ||
| return false; | ||
| unsigned SourceLen = SourceLHS.getVectorLength(); | ||
| SmallVector<APValue, 4> ResultElements; | ||
| ResultElements.reserve(SourceLen); | ||
| for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) { | ||
| APFloat LHSA = SourceLHS.getVectorElt(EltNum).getFloat(); | ||
| APFloat LHSB = SourceLHS.getVectorElt(EltNum + 1).getFloat(); | ||
| LHSA.subtract(LHSB, APFloat::rmNearestTiesToEven); | ||
| ResultElements.push_back(APValue(LHSA)); | ||
| } | ||
| for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) { | ||
| APFloat RHSA = SourceRHS.getVectorElt(EltNum).getFloat(); | ||
| APFloat RHSB = SourceRHS.getVectorElt(EltNum + 1).getFloat(); | ||
| RHSA.subtract(RHSB, APFloat::rmNearestTiesToEven); | ||
| ResultElements.push_back(APValue(RHSA)); | ||
| } | ||
| return Success(APValue(ResultElements.data(), ResultElements.size()), E); | ||
| } | ||
|
|
||
| case Builtin::BI__builtin_elementwise_fshl: | ||
| case Builtin::BI__builtin_elementwise_fshr: { | ||
| APValue SourceHi, SourceLo, SourceShift; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.