[Clang] Add constexpr support for AVX512 permutexvar intrinsics#167802
Merged
[Clang] Add constexpr support for AVX512 permutexvar intrinsics#167802
Conversation
Member
|
@llvm/pr-subscribers-clang Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves: #167476 Patch is 55.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167802.diff 14 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cb08e2107f072..b261b681990e0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -603,6 +603,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+ def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
@@ -617,9 +622,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
- def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
- def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
@@ -3052,38 +3055,38 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def permdi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvardf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def permvardi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
def permvarsf512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
def permvarsi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
}
-let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvardf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def permvardi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6c7b2f502cc51..c72a3566681b1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4414,6 +4414,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
}
});
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128:
return interp__builtin_ia32_shuffle_generic(
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1bfea24b228e8..e9e448143477e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13551,6 +13551,70 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128: {
APValue R;
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index 46ec12a63ef9c..3201307af4731 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -179,7 +179,7 @@ _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
(__v32hi)__B);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 8fb8cd7cd0865..9f5b726d7b789 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -307,12 +307,12 @@ _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
(__v16hi)__B);
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
}
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
}
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3cbaaece7b38e..3e3c13d8bd662 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3214,9 +3214,8 @@ _mm_broadcastq_epi64(__m128i __X) {
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
}
@@ -3272,9 +3271,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 4a02c96620335..3cfa32eb9e727 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1846,25 +1846,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
(__v32hi) _mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)__W);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..79c37173ac838 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -7959,93 +7959,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
(__v8di)_mm512_permutex_epi64((X), (C)), \
(__v8di)_mm512_setzero_si512()))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
+ __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
}
#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__X, __Y),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__...
[truncated]
|
Member
|
@llvm/pr-subscribers-backend-x86 Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves: #167476 Patch is 55.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167802.diff 14 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cb08e2107f072..b261b681990e0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -603,6 +603,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+ def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
@@ -617,9 +622,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
- def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
- def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
@@ -3052,38 +3055,38 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def permdi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvardf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def permvardi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
def permvarsf512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
def permvarsi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
}
-let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvardf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def permvardi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6c7b2f502cc51..c72a3566681b1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4414,6 +4414,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
}
});
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128:
return interp__builtin_ia32_shuffle_generic(
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1bfea24b228e8..e9e448143477e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13551,6 +13551,70 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128: {
APValue R;
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index 46ec12a63ef9c..3201307af4731 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -179,7 +179,7 @@ _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
(__v32hi)__B);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 8fb8cd7cd0865..9f5b726d7b789 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -307,12 +307,12 @@ _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
(__v16hi)__B);
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
}
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
}
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3cbaaece7b38e..3e3c13d8bd662 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3214,9 +3214,8 @@ _mm_broadcastq_epi64(__m128i __X) {
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
}
@@ -3272,9 +3271,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 4a02c96620335..3cfa32eb9e727 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1846,25 +1846,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
(__v32hi) _mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)__W);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..79c37173ac838 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -7959,93 +7959,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
(__v8di)_mm512_permutex_epi64((X), (C)), \
(__v8di)_mm512_setzero_si512()))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
+ __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
}
#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__X, __Y),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__...
[truncated]
|
RKSimon
requested changes
Nov 13, 2025
6f32ecc to
ceaae3c
Compare
RKSimon
requested changes
Nov 13, 2025
Collaborator
RKSimon
left a comment
There was a problem hiding this comment.
sorry - you missed the avx2 tests
| _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) | ||
| { | ||
| static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR | ||
| _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { |
… AVX512 permutexvar intrinsics to be used in constexpr Resolves: llvm#167476
- Group permvarsi256/permvarsf256 with other AVX2 constexpr builtins - Remove unnecessary SrcIdx variable and use zero directly in pair construction
ceaae3c to
3502a04
Compare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Resolves: #167476