Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
Expand All @@ -335,7 +333,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
}

let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;

def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
}

Expand Down Expand Up @@ -470,8 +471,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
Expand All @@ -495,6 +494,11 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
}

let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3421,6 +3421,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return F;
});

case clang::X86::BI__builtin_ia32_blendvpd:
case clang::X86::BI__builtin_ia32_blendvpd256:
case clang::X86::BI__builtin_ia32_blendvps:
case clang::X86::BI__builtin_ia32_blendvps256:
return interp__builtin_elementwise_triop_fp(
S, OpPC, Call,
[](const APFloat &F, const APFloat &T, const APFloat &C,
llvm::RoundingMode) {
return C.bitcastToAPInt().isNegative() ? T : F;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the bitcast here really needed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, its really just to minimize diffs with ExprConstant - I can change it

});

case clang::X86::BI__builtin_ia32_pblendvb128:
case clang::X86::BI__builtin_ia32_pblendvb256:
return interp__builtin_elementwise_triop(
Expand Down
9 changes: 7 additions & 2 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11995,6 +11995,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case X86::BI__builtin_ia32_blendvpd:
case X86::BI__builtin_ia32_blendvpd256:
case X86::BI__builtin_ia32_blendvps:
case X86::BI__builtin_ia32_blendvps256:
case X86::BI__builtin_ia32_pblendvb128:
case X86::BI__builtin_ia32_pblendvb256: {
// SSE blendv by mask signbit: "Result = C[] < 0 ? T[] : F[]".
Expand All @@ -12011,8 +12015,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
const APValue &F = SourceF.getVectorElt(EltNum);
const APValue &T = SourceT.getVectorElt(EltNum);
APInt C = SourceC.getVectorElt(EltNum).getInt();
ResultElements.push_back(C.isNegative() ? T : F);
const APValue &C = SourceC.getVectorElt(EltNum);
APInt M = C.isInt() ? (APInt)C.getInt() : C.getFloat().bitcastToAPInt();
ResultElements.push_back(M.isNegative() ? T : F);
}

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1402,9 +1402,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// 64-bit element in operand \a __b is copied to the same position in the
/// destination.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
{
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) {
return (__m256d)__builtin_ia32_blendvpd256(
(__v4df)__a, (__v4df)__b, (__v4df)__c);
}
Expand All @@ -1430,9 +1429,8 @@ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
/// corresponding 32-bit element in operand \a __b is copied to the same
/// position in the destination.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
{
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) {
return (__m256)__builtin_ia32_blendvps256(
(__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
}
Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/smmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,8 @@
/// position in the result. When a mask bit is 1, the corresponding 64-bit
/// element in operand \a __V2 is copied to the same position in the result.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1,
__m128d __V2,
__m128d __M) {
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_blendv_pd(__m128d __V1, __m128d __V2, __m128d __M) {
return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2,
(__v2df)__M);
}
Expand All @@ -466,9 +465,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1,
/// position in the result. When a mask bit is 1, the corresponding 32-bit
/// element in operand \a __V2 is copied to the same position in the result.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1,
__m128 __V2,
__m128 __M) {
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_blendv_ps(__m128 __V1, __m128 __V2, __m128 __M) {
return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2,
(__v4sf)__M);
}
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,14 @@ __m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_blendv_pd(V1, V2, V3);
}
TEST_CONSTEXPR(match_m256d(_mm256_blendv_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0},(__m256d)(__v4df){-100.0, -101.0, -102.0, -103.0},(__m256d)(__v4df){0.0, -1.0, 1.0, -1.0}), 1.0f, -101.0, 3.0, -103.0));

__m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) {
// CHECK-LABEL: test_mm256_blendv_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_blendv_ps(V1, V2, V3);
}
TEST_CONSTEXPR(match_m256(_mm256_blendv_ps((__m256)(__v8sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f},(__m256)(__v8sf){-100.0f, -101.0f, -102.0f, -103.0f, -104.0f, -105.0f, -106.0f, -107.0f},(__m256)(__v8sf){-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, -0.0f}), -100.0f, 1.0f, -102.0f, 3.0f, -104.0f, -105.0f, 6.0f, -107.0f));

__m256d test_mm256_broadcast_pd(__m128d* A) {
// CHECK-LABEL: test_mm256_broadcast_pd
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/sse41-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_blendv_pd(V1, V2, V3);
}
TEST_CONSTEXPR(match_m128d(_mm_blendv_pd((__m128d)(__v2df){2.0, -4.0},(__m128d)(__v2df){-111.0, +222.0},(__m128d)(__v2df){2.0, -2.0}), 2.0, 222.0));

__m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
// CHECK-LABEL: test_mm_blendv_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.blendvps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_blendv_ps(V1, V2, V3);
}
TEST_CONSTEXPR(match_m128(_mm_blendv_ps((__m128)(__v4sf){0.0f, 1.0f, 2.0f, 3.0f},(__m128)(__v4sf){-100.0f, -101.0f, -102.0f, -103.0f},(__m128)(__v4sf){-1.0f, 2.0f, -3.0f, 0.0f}), -100.0f, 1.0f, -102.0f, 3.0f));

__m128d test_mm_ceil_pd(__m128d x) {
// CHECK-LABEL: test_mm_ceil_pd
Expand Down