Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 41 additions & 25 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -276,21 +276,25 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">;
def pslldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}

let Features = "sse2",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;

def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
def psllqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;

def psrlwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
def psrldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
def psrlqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;

def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">;
def pslldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
}

let Features = "sse3", Attributes = [NoThrow] in {
Expand Down Expand Up @@ -595,23 +599,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def pslldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
Expand All @@ -628,6 +624,19 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;

def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
def psllqi256
: X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;

def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
def psrlqi256
: X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;

def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;

def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;

Expand Down Expand Up @@ -2062,7 +2071,6 @@ let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorW
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
Expand All @@ -2073,7 +2081,9 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def psllv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}

let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f,evex512",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def pslldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
def psllqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
}
Expand All @@ -2090,7 +2100,9 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}

let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f,evex512",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
}
Expand All @@ -2116,10 +2128,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
}

let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def psraw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psrlw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psraw512
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psrlw512
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def pslldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
def psrldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
Expand Down Expand Up @@ -2435,7 +2447,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
def scalefss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
}

let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f,evex512",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psradi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
}
Expand All @@ -2448,11 +2462,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
}

Expand Down
69 changes: 55 additions & 14 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2565,10 +2565,34 @@ static bool interp__builtin_elementwise_int_binop(
return true;
}

const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
assert(VT->getElementType()->isIntegralOrEnumerationType());
PrimType ElemT = *S.getContext().classify(VT->getElementType());
unsigned NumElems = VT->getNumElements();
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();

// Vector + Scalar case.
if (!Call->getArg(1)->getType()->isVectorType()) {
assert(Call->getArg(1)->getType()->isIntegralOrEnumerationType());

APSInt RHS = popToAPSInt(
S.Stk, *S.getContext().classify(Call->getArg(1)->getType()));
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

for (unsigned I = 0; I != NumElems; ++I) {
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
Dst.elem<T>(I) = static_cast<T>(
APSInt(Fn(LHS.elem<T>(I).toAPSInt(), RHS), DestUnsigned));
});
}
Dst.initializeAllElements();
return true;
}

// Vector case.
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
assert(VT->getElementType() ==
Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
assert(VT->getNumElements() ==
Expand All @@ -2578,22 +2602,12 @@ static bool interp__builtin_elementwise_int_binop(
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
PrimType ElemT = *S.getContext().classify(VT->getElementType());
unsigned NumElems = VT->getNumElements();
for (unsigned I = 0; I != NumElems; ++I) {
APSInt Elem1;
APSInt Elem2;
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
Elem1 = LHS.elem<T>(I).toAPSInt();
Elem2 = RHS.elem<T>(I).toAPSInt();
APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
APSInt Elem2 = RHS.elem<T>(I).toAPSInt();
Dst.elem<T>(I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
});

APSInt Result =
APSInt(Fn(Elem1, Elem2),
Call->getType()->isUnsignedIntegerOrEnumerationType());

INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(I) = static_cast<T>(Result); });
}
Dst.initializeAllElements();

Expand Down Expand Up @@ -3254,6 +3268,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
case clang::X86::BI__builtin_ia32_psllv8si:
case clang::X86::BI__builtin_ia32_psllwi128:
case clang::X86::BI__builtin_ia32_psllwi256:
case clang::X86::BI__builtin_ia32_psllwi512:
case clang::X86::BI__builtin_ia32_pslldi128:
case clang::X86::BI__builtin_ia32_pslldi256:
case clang::X86::BI__builtin_ia32_pslldi512:
case clang::X86::BI__builtin_ia32_psllqi128:
case clang::X86::BI__builtin_ia32_psllqi256:
case clang::X86::BI__builtin_ia32_psllqi512:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
if (RHS.uge(LHS.getBitWidth())) {
Expand All @@ -3264,6 +3287,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,

case clang::X86::BI__builtin_ia32_psrav4si:
case clang::X86::BI__builtin_ia32_psrav8si:
case clang::X86::BI__builtin_ia32_psrawi128:
case clang::X86::BI__builtin_ia32_psrawi256:
case clang::X86::BI__builtin_ia32_psrawi512:
case clang::X86::BI__builtin_ia32_psradi128:
case clang::X86::BI__builtin_ia32_psradi256:
case clang::X86::BI__builtin_ia32_psradi512:
case clang::X86::BI__builtin_ia32_psraqi128:
case clang::X86::BI__builtin_ia32_psraqi256:
case clang::X86::BI__builtin_ia32_psraqi512:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
if (RHS.uge(LHS.getBitWidth())) {
Expand All @@ -3276,6 +3308,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_psrlv4di:
case clang::X86::BI__builtin_ia32_psrlv4si:
case clang::X86::BI__builtin_ia32_psrlv8si:
case clang::X86::BI__builtin_ia32_psrlwi128:
case clang::X86::BI__builtin_ia32_psrlwi256:
case clang::X86::BI__builtin_ia32_psrlwi512:
case clang::X86::BI__builtin_ia32_psrldi128:
case clang::X86::BI__builtin_ia32_psrldi256:
case clang::X86::BI__builtin_ia32_psrldi512:
case clang::X86::BI__builtin_ia32_psrlqi128:
case clang::X86::BI__builtin_ia32_psrlqi256:
case clang::X86::BI__builtin_ia32_psrlqi512:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
if (RHS.uge(LHS.getBitWidth())) {
Expand Down
91 changes: 90 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11632,7 +11632,38 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_psrlv2di:
case clang::X86::BI__builtin_ia32_psrlv4di:
case clang::X86::BI__builtin_ia32_psrlv4si:
case clang::X86::BI__builtin_ia32_psrlv8si:{
case clang::X86::BI__builtin_ia32_psrlv8si:

case clang::X86::BI__builtin_ia32_psllwi128:
case clang::X86::BI__builtin_ia32_pslldi128:
case clang::X86::BI__builtin_ia32_psllqi128:
case clang::X86::BI__builtin_ia32_psllwi256:
case clang::X86::BI__builtin_ia32_pslldi256:
case clang::X86::BI__builtin_ia32_psllqi256:
case clang::X86::BI__builtin_ia32_psllwi512:
case clang::X86::BI__builtin_ia32_pslldi512:
case clang::X86::BI__builtin_ia32_psllqi512:

case clang::X86::BI__builtin_ia32_psrlwi128:
case clang::X86::BI__builtin_ia32_psrldi128:
case clang::X86::BI__builtin_ia32_psrlqi128:
case clang::X86::BI__builtin_ia32_psrlwi256:
case clang::X86::BI__builtin_ia32_psrldi256:
case clang::X86::BI__builtin_ia32_psrlqi256:
case clang::X86::BI__builtin_ia32_psrlwi512:
case clang::X86::BI__builtin_ia32_psrldi512:
case clang::X86::BI__builtin_ia32_psrlqi512:

case clang::X86::BI__builtin_ia32_psrawi128:
case clang::X86::BI__builtin_ia32_psradi128:
case clang::X86::BI__builtin_ia32_psraqi128:
case clang::X86::BI__builtin_ia32_psrawi256:
case clang::X86::BI__builtin_ia32_psradi256:
case clang::X86::BI__builtin_ia32_psraqi256:
case clang::X86::BI__builtin_ia32_psrawi512:
case clang::X86::BI__builtin_ia32_psradi512:
case clang::X86::BI__builtin_ia32_psraqi512: {

APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
Expand All @@ -11646,6 +11677,64 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();

if (SourceRHS.isInt()) {
const unsigned LaneBitWidth = LHS.getBitWidth();
const unsigned ShiftAmount = SourceRHS.getInt().getZExtValue();

switch (E->getBuiltinCallee()) {
case clang::X86::BI__builtin_ia32_psllwi128:
case clang::X86::BI__builtin_ia32_psllwi256:
case clang::X86::BI__builtin_ia32_psllwi512:
case clang::X86::BI__builtin_ia32_pslldi128:
case clang::X86::BI__builtin_ia32_pslldi256:
case clang::X86::BI__builtin_ia32_pslldi512:
case clang::X86::BI__builtin_ia32_psllqi128:
case clang::X86::BI__builtin_ia32_psllqi256:
case clang::X86::BI__builtin_ia32_psllqi512:
if (ShiftAmount >= LaneBitWidth) {
ResultElements.push_back(
APValue(APSInt(APInt::getZero(LaneBitWidth), DestUnsigned)));
} else {
ResultElements.push_back(
APValue(APSInt(LHS.shl(ShiftAmount), DestUnsigned)));
}
break;
case clang::X86::BI__builtin_ia32_psrlwi128:
case clang::X86::BI__builtin_ia32_psrlwi256:
case clang::X86::BI__builtin_ia32_psrlwi512:
case clang::X86::BI__builtin_ia32_psrldi128:
case clang::X86::BI__builtin_ia32_psrldi256:
case clang::X86::BI__builtin_ia32_psrldi512:
case clang::X86::BI__builtin_ia32_psrlqi128:
case clang::X86::BI__builtin_ia32_psrlqi256:
case clang::X86::BI__builtin_ia32_psrlqi512:
if (ShiftAmount >= LaneBitWidth) {
ResultElements.push_back(
APValue(APSInt(APInt::getZero(LaneBitWidth), DestUnsigned)));
} else {
ResultElements.push_back(
APValue(APSInt(LHS.lshr(ShiftAmount), DestUnsigned)));
}
break;
case clang::X86::BI__builtin_ia32_psrawi128:
case clang::X86::BI__builtin_ia32_psrawi256:
case clang::X86::BI__builtin_ia32_psrawi512:
case clang::X86::BI__builtin_ia32_psradi128:
case clang::X86::BI__builtin_ia32_psradi256:
case clang::X86::BI__builtin_ia32_psradi512:
case clang::X86::BI__builtin_ia32_psraqi128:
case clang::X86::BI__builtin_ia32_psraqi256:
case clang::X86::BI__builtin_ia32_psraqi512:
ResultElements.push_back(
APValue(APSInt(LHS.ashr(std::min(ShiftAmount, LaneBitWidth - 1)),
DestUnsigned)));
break;
default:
llvm_unreachable("Unexpected builtin callee");
}
continue;
}
APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
switch (E->getBuiltinCallee()) {
case Builtin::BI__builtin_elementwise_add_sat:
Expand Down
Loading