diff --git a/sycl/include/sycl/detail/type_traits.hpp b/sycl/include/sycl/detail/type_traits.hpp index 63bdc88ff4ba1..2acd037ab72d5 100644 --- a/sycl/include/sycl/detail/type_traits.hpp +++ b/sycl/include/sycl/detail/type_traits.hpp @@ -384,6 +384,22 @@ struct map_type { template constexpr bool check_type_in_v = ((std::is_same_v || ...)); +#if __has_builtin(__type_pack_element) +template +using nth_type_t = __type_pack_element; +#else +template struct nth_type { + using type = typename nth_type::type; +}; + +template struct nth_type<0, T, Ts...> { + using type = T; +}; + +template +using nth_type_t = typename nth_type::type; +#endif + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index d592c4a7e0b68..4781eb9c7a2cd 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -24,49 +24,143 @@ template class __SYCL_EBO vec; namespace detail { -template class VecAccess; +// We use std::plus and similar to "map" template parameter to an +// overloaded operator. These three below are missing from ``. +struct ShiftLeft { + template + constexpr auto operator()(T &&lhs, U &&rhs) const + -> decltype(std::forward(lhs) << std::forward(rhs)) { + return std::forward(lhs) << std::forward(rhs); + } +}; +struct ShiftRight { + template + constexpr auto operator()(T &&lhs, + U &&rhs) const -> decltype(std::forward(lhs) >> + std::forward(rhs)) { + return std::forward(lhs) >> std::forward(rhs); + } +}; -// Macros to populate binary operation on sycl::vec. -#if defined(__SYCL_BINOP) || defined(BINOP_BASE) -#error "Undefine __SYCL_BINOP and BINOP_BASE macro" -#endif +struct UnaryPlus { + template + constexpr auto operator()(T &&arg) const -> decltype(+std::forward(arg)) { + return +std::forward(arg); + } +}; +struct VecOperators { #ifdef __SYCL_DEVICE_ONLY__ -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const vec_t & Rhs) { \ - vec_t Ret; \ - if constexpr (vec_t::IsBfloat16) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ - } \ - } else { \ - auto ExtVecLhs = sycl::bit_cast(Lhs); \ - auto ExtVecRhs = sycl::bit_cast(Rhs); \ - Ret = vec(ExtVecLhs BINOP ExtVecRhs); \ - if constexpr (std::is_same_v && CONVERT) { \ - vec_arith_common::ConvertToDataT(Ret); \ - } \ - } \ - return Ret; \ + static constexpr bool is_host = false; +#else + static constexpr bool is_host = true; +#endif + + template + static constexpr auto apply(const ArgTys &...Args) { + using Self = nth_type_t<0, ArgTys...>; + static_assert(is_vec_v); + static_assert(((std::is_same_v && ...))); + + using element_type = typename Self::element_type; + constexpr int N = Self::size(); + constexpr bool is_logical = check_type_in_v< + BinOp, std::equal_to, std::not_equal_to, std::less, + std::greater, std::less_equal, std::greater_equal, + std::logical_and, std::logical_or, std::logical_not>; + + using result_t = std::conditional_t< + is_logical, vec, N>, Self>; + + BinOp Op{}; + if constexpr (is_host || N == 1 || + std::is_same_v) { + result_t res{}; + for (size_t i = 0; i < N; ++i) + if constexpr (is_logical) + res[i] = Op(Args[i]...) ? -1 : 0; + else + res[i] = Op(Args[i]...); + return res; + } else { + using vector_t = typename Self::vector_t; + + auto res = [&](auto... xs) { + // Workaround for https://github.com/llvm/llvm-project/issues/119617. + if constexpr (sizeof...(Args) == 2) { + return [&](auto x, auto y) { + if constexpr (std::is_same_v>) + return x == y; + else if constexpr (std::is_same_v>) + return x != y; + else if constexpr (std::is_same_v>) + return x < y; + else if constexpr (std::is_same_v>) + return x <= y; + else if constexpr (std::is_same_v>) + return x > y; + else if constexpr (std::is_same_v>) + return x >= y; + else + return Op(x, y); + }(xs...); + } else { + return Op(xs...); + } + }(bit_cast(Args)...); + + if constexpr (std::is_same_v) { + // vec(vector_t) ctor does a simple bit_cast and the way "bool" is + // stored is that only one bit matters. vector_t, however, is a char + // type and it can have non-zero value with lowest bit unset. E.g., + // consider this: + // + // auto x = true + true; // int x = 2 + // bool y = true + true; // bool y = true + // + // and the vec has to behave in a similar way. As such, current + // implementation needs to do some extra processing for operators that + // can result in this scenario. + // + if constexpr (!is_logical && + !check_type_in_v, + std::divides, std::bit_or, + std::bit_and, std::bit_xor, + ShiftRight, UnaryPlus>) { + // TODO: Not sure why the following doesn't work + // (test-e2e/Basic/vector/bool.cpp fails). + // + // res = (decltype(res))(res != 0); + for (size_t i = 0; i < N; ++i) + res[i] = bit_cast(res[i]) != 0; + } + } + // The following is true: + // + // using char2 = char __attribute__((ext_vector_type(2))); + // using uchar2 = unsigned char __attribute__((ext_vector_type(2))); + // static_assert(std::is_same_v() == + // std::declval()), + // char2>); + // + // so we need some extra casts. Also, static_cast(char2{}) + // isn't allowed either. + return result_t{(typename result_t::vector_t)res}; + } } -#else // __SYCL_DEVICE_ONLY__ +}; + +// Macros to populate binary operation on sycl::vec. +#if defined(__SYCL_BINOP) +#error "Undefine __SYCL_BINOP macro" +#endif -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ +#define __SYCL_BINOP(BINOP, OPASSIGN, COND, FUNCTOR) \ template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ const vec_t & Rhs) { \ - vec_t Ret{}; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ - } \ - return Ret; \ - } -#endif // __SYCL_DEVICE_ONLY__ - -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT, COND) \ - BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + return VecOperators::apply(Lhs, Rhs); \ + } \ \ template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ @@ -117,57 +211,17 @@ class vec_arith : public vec_arith_common { // operator!. friend vec operator!(const vec_t &Rhs) { -#ifdef __SYCL_DEVICE_ONLY__ - if constexpr (!vec_t::IsBfloat16) { - auto extVec = sycl::bit_cast(Rhs); - vec Ret{ - (typename vec::vector_t) !extVec}; - return Ret; - } else -#endif // __SYCL_DEVICE_ONLY__ - { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - // static_cast will work here as the output of ! operator is either 0 or - // -1. - Ret[I] = static_cast(-1 * (!Rhs[I])); - } - return Ret; - } + return VecOperators::apply>(Rhs); } // operator +. friend vec_t operator+(const vec_t &Lhs) { -#ifdef __SYCL_DEVICE_ONLY__ - auto extVec = sycl::bit_cast(Lhs); - return vec_t{+extVec}; -#else - vec_t Ret{}; - for (size_t I = 0; I < NumElements; ++I) - Ret[I] = +Lhs[I]; - return Ret; -#endif + return VecOperators::apply(Lhs); } // operator -. friend vec_t operator-(const vec_t &Lhs) { - vec_t Ret{}; - if constexpr (vec_t::IsBfloat16) { - for (size_t I = 0; I < NumElements; I++) - Ret[I] = -Lhs[I]; - } else { -#ifndef __SYCL_DEVICE_ONLY__ - for (size_t I = 0; I < NumElements; ++I) - Ret[I] = -Lhs[I]; -#else - auto extVec = sycl::bit_cast(Lhs); - Ret = vec_t{-extVec}; - if constexpr (std::is_same_v) { - vec_arith_common::ConvertToDataT(Ret); - } -#endif - } - return Ret; + return VecOperators::apply>(Lhs); } // Unary operations on sycl::vec @@ -195,52 +249,16 @@ class vec_arith : public vec_arith_common { // logical operations should result in 0 and -1 (similar to OpenCL vectors). // That's why, for vec, we need to invert the result of the logical // operations since we store vec as scalar type on the device. -#if defined(__SYCL_RELLOGOP) || defined(RELLOGOP_BASE) -#error "Undefine __SYCL_RELLOGOP and RELLOGOP_BASE macro." +#if defined(__SYCL_RELLOGOP) +#error "Undefine __SYCL_RELLOGOP macro." #endif -#ifdef __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ +#define __SYCL_RELLOGOP(RELLOGOP, COND, FUNCTOR) \ template \ friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ const vec_t & Lhs, const vec_t & Rhs) { \ - vec Ret{}; \ - /* ext_vector_type does not support bfloat16, so for these */ \ - /* we do element-by-element operation on the underlying std::array. */ \ - if constexpr (vec_t::IsBfloat16) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ - } \ - } else { \ - auto ExtVecLhs = sycl::bit_cast(Lhs); \ - auto ExtVecRhs = sycl::bit_cast(Rhs); \ - /* Cast required to convert unsigned char ext_vec_type to */ \ - /* char ext_vec_type. */ \ - Ret = vec( \ - (typename vec::vector_t)( \ - ExtVecLhs RELLOGOP ExtVecRhs)); \ - /* For NumElements == 1, we use scalar instead of ext_vector_type. */ \ - if constexpr (NumElements == 1) { \ - Ret *= -1; \ - } \ - } \ - return Ret; \ - } -#else // __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec_t & Lhs, const vec_t & Rhs) { \ - vec Ret{}; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ - } \ - return Ret; \ - } -#endif - -#define __SYCL_RELLOGOP(RELLOGOP, COND) \ - RELLOGOP_BASE(RELLOGOP, COND) \ + return VecOperators::apply(Lhs, Rhs); \ + } \ \ template \ friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ @@ -256,34 +274,34 @@ class vec_arith : public vec_arith_common { // OP is: ==, !=, <, >, <=, >=, &&, || // vec operatorOP(const vec &Rhs) const; // vec operatorOP(const DataT &Rhs) const; - __SYCL_RELLOGOP(==, true) - __SYCL_RELLOGOP(!=, true) - __SYCL_RELLOGOP(>, true) - __SYCL_RELLOGOP(<, true) - __SYCL_RELLOGOP(>=, true) - __SYCL_RELLOGOP(<=, true) + __SYCL_RELLOGOP(==, true, std::equal_to) + __SYCL_RELLOGOP(!=, true, std::not_equal_to) + __SYCL_RELLOGOP(>, true, std::greater) + __SYCL_RELLOGOP(<, true, std::less) + __SYCL_RELLOGOP(>=, true, std::greater_equal) + __SYCL_RELLOGOP(<=, true, std::less_equal) // Only available to integral types. - __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v)) - __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v)) + __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v), std::logical_and) + __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v), std::logical_or) #undef __SYCL_RELLOGOP #undef RELLOGOP_BASE // Binary operations on sycl::vec<> for all types except std::byte. - __SYCL_BINOP(+, +=, true, true) - __SYCL_BINOP(-, -=, true, true) - __SYCL_BINOP(*, *=, false, true) - __SYCL_BINOP(/, /=, false, true) + __SYCL_BINOP(+, +=, true, std::plus) + __SYCL_BINOP(-, -=, true, std::minus) + __SYCL_BINOP(*, *=, true, std::multiplies) + __SYCL_BINOP(/, /=, true, std::divides) // The following OPs are available only when: DataT != cl_float && // DataT != cl_double && DataT != cl_half && DataT != BF16. - __SYCL_BINOP(%, %=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(%, %=, (!detail::is_vgenfloat_v), std::modulus) // Bitwise operations are allowed for std::byte. - __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(^, ^=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(>>, >>=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(<<, <<=, true, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(|, |=, (!detail::is_vgenfloat_v), std::bit_or) + __SYCL_BINOP(&, &=, (!detail::is_vgenfloat_v), std::bit_and) + __SYCL_BINOP(^, ^=, (!detail::is_vgenfloat_v), std::bit_xor) + __SYCL_BINOP(>>, >>=, (!detail::is_vgenfloat_v), ShiftRight) + __SYCL_BINOP(<<, <<=, (!detail::is_vgenfloat_v), ShiftLeft) // friends template friend class __SYCL_EBO vec; @@ -334,9 +352,9 @@ class vec_arith return Lhs; } - __SYCL_BINOP(|, |=, false, true) - __SYCL_BINOP(&, &=, false, true) - __SYCL_BINOP(^, ^=, false, true) + __SYCL_BINOP(|, |=, true, std::bit_or) + __SYCL_BINOP(&, &=, true, std::bit_and) + __SYCL_BINOP(^, ^=, true, std::bit_xor) // friends template friend class __SYCL_EBO vec; @@ -355,38 +373,14 @@ template class vec_arith_common { template friend std::enable_if_t, vec_t> operator~(const vec_t &Rhs) { -#ifdef __SYCL_DEVICE_ONLY__ - auto extVec = sycl::bit_cast(Rhs); - vec_t Ret{~extVec}; - if constexpr (std::is_same_v) { - ConvertToDataT(Ret); - } - return Ret; -#else - vec_t Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = ~Rhs[I]; - } - return Ret; -#endif - } - -#ifdef __SYCL_DEVICE_ONLY__ - using vec_bool_t = vec; - // Required only for std::bool. - static void ConvertToDataT(vec_bool_t &Ret) { - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = bit_cast(Ret[I]) != 0; - } + return VecOperators::apply>(Rhs); } -#endif // friends template friend class __SYCL_EBO vec; }; #undef __SYCL_BINOP -#undef BINOP_BASE } // namespace detail } // namespace _V1 diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 5873d8acea56e..354975b857d13 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include // for IsRuntimePr... #include // for Sorted, Mer... #include // for property_value @@ -24,6 +25,7 @@ namespace ext::oneapi::experimental { template class __SYCL_EBO properties; namespace detail { +using namespace sycl::detail; // NOTE: Meta-function to implement CTAD rules isn't allowed to return // `properties` and it's impossible to return a pack as well. As diff --git a/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp b/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp index 17609b55bcb95..42e4b315073f8 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp @@ -22,26 +22,6 @@ inline namespace _V1 { namespace ext::oneapi::experimental { namespace detail { -//****************************************************************************** -// Misc -//****************************************************************************** - -#if __has_builtin(__type_pack_element) -template -using nth_type_t = __type_pack_element; -#else -template struct nth_type { - using type = typename nth_type::type; -}; - -template struct nth_type<0, T, Ts...> { - using type = T; -}; - -template -using nth_type_t = typename nth_type::type; -#endif - //****************************************************************************** // Property value tooling //****************************************************************************** diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index cbdbcbc1a7cff..527a6bee05831 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -22,48 +22,52 @@ using namespace sycl; // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA11:![0-9]+]], !noalias [[META8]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA11]], !noalias [[META8]] -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <2 x i32> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META8]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA14:![0-9]+]], !noalias [[META17:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META17]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA14]], !alias.scope [[META17]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.0") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.0") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META14:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.1") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META15]] -// CHECK-NEXT: [[LOADVEC4_I6_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META15]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVEC4_I_I]], [[LOADVEC4_I6_I]] -// CHECK-NEXT: [[EXTRACTVEC_I8_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I8_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META25:![0-9]+]] +// CHECK-NEXT: [[LOADVEC4_I7_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META25]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVEC4_I_I_I]], [[LOADVEC4_I7_I_I]] +// CHECK-NEXT: [[EXTRACTVEC_I9_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I9_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META25]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.8") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META26:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META19]] -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA11]], !noalias [[META19]] -// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <16 x i8> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META33:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META33]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.10") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META22:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META34:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA11]], !noalias [[META23]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA11]], !noalias [[META23]] -// CHECK-NEXT: [[XOR_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <8 x i8> [[XOR_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META23]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA14]], !noalias [[META41:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META41]] +// CHECK-NEXT: [[XOR_I_I_I_I_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA14]], !alias.scope [[META41]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestXor(vec a, vec b) { @@ -71,72 +75,81 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable sret(%"class.sycl::_V1::vec.15") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META26:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.22") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.22") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.22") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META27]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA11]], !noalias [[META27]] -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <4 x i8> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META27]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META49:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA14]], !noalias [[META49]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: -// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[RES_0_I_I:%.*]] = phi <4 x i8> [ [[ADD_I_I_I_I_I]], [[ENTRY:%.*]] ], [ [[VECINS_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I]] ] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLIBEENS0_3VECIBLI4EEERKS4_S6__EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I_I_I_I_I]], align 1, !tbaa [[TBAA11]], !alias.scope [[META27]] -// CHECK-NEXT: [[CMP3_I_I:%.*]] = icmp ne i8 [[TMP2]], 0 -// CHECK-NEXT: [[FROMBOOL_I_I:%.*]] = zext i1 [[CMP3_I_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I_I]], align 1, !tbaa [[TBAA30:![0-9]+]], !alias.scope [[META27]] +// CHECK-NEXT: [[VECEXT_I_I:%.*]] = extractelement <4 x i8> [[RES_0_I_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[CMP8_I_I:%.*]] = icmp ne i8 [[VECEXT_I_I]], 0 +// CHECK-NEXT: [[CONV9_I_I:%.*]] = zext i1 [[CMP8_I_I]] to i8 +// CHECK-NEXT: [[VECINS_I_I]] = insertelement <4 x i8> [[RES_0_I_I]], i8 [[CONV9_I_I]], i64 [[I_0_I_I]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplIbEENS0_3vecIbLi4EEERKS4_S6_.exit: +// CHECK-NEXT: store <4 x i8> [[RES_0_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA14]], !alias.scope [[META49]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META34:![0-9]+]] !sycl_used_aspects [[META35:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.29") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.29") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.29") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META52:![0-9]+]] !sycl_used_aspects [[META53:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META37]] -// CHECK-NEXT: [[LOADVEC4_I6_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META37]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVEC4_I_I]], [[LOADVEC4_I6_I]] -// CHECK-NEXT: [[EXTRACTVEC_I8_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> -// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I8_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META61:![0-9]+]] +// CHECK-NEXT: [[LOADVEC4_I7_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META61]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVEC4_I_I_I]], [[LOADVEC4_I7_I_I]] +// CHECK-NEXT: [[EXTRACTVEC_I9_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> +// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I9_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA14]], !alias.scope [[META61]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META40:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.36") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.36") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.36") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META62:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.36", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) -// CHECK-NEXT: [[REF_TMP_ASCAST_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I]] to ptr addrspace(4) -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECIS5_LI3EEERKS7_S9__EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I10_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META41]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META44:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I10_I]]) #[[ATTR8]], !noalias [[META44]] -// CHECK-NEXT: [[ADD_I_I:%.*]] = fadd float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] -// CHECK-NEXT: store float [[ADD_I_I]], ptr [[REF_TMP_I_I]], align 4, !tbaa [[TBAA47:![0-9]+]], !noalias [[META44]] -// CHECK-NEXT: [[CALL_I_I3_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR8]], !noalias [[META44]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META41]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CALL_I_I3_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I12_I]], align 2, !tbaa [[TBAA49:![0-9]+]], !alias.scope [[META41]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP51:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META63]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META69:![0-9]+]] +// CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECIS5_LI3EEERKS7_S9__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META70:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9:[0-9]+]], !noalias [[META73:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I12_I_I]]) #[[ATTR9]], !noalias [[META73]] +// CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] +// CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA76:![0-9]+]], !noalias [[META73]] +// CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR9]], !noalias [[META73]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META70]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I_I_I14_I_I]], align 2, !tbaa [[TBAA78:![0-9]+]], !noalias [[META69]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP80:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENS0_3vecIS5_Li3EEERKS7_S9_.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META69]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META63]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, @@ -147,14 +160,15 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.30") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.30") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.30") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META52:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.41") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.41") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.41") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META81:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA11]], !noalias [[META53]] -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA11]], !noalias [[META53]] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i32> -// CHECK-NEXT: store <16 x i32> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !alias.scope [[META53]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA14]], !noalias [[META88:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA14]], !noalias [[META88]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <16 x i1> [[CMP_I_I_I_I]] to <16 x i32> +// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !tbaa [[TBAA14]], !alias.scope [[META88]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -162,7 +176,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.35") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.35") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] !srcloc [[META56:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.49") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.49") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] !srcloc [[META89:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 // CHECK-NEXT: [[LOADVEC4_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 @@ -176,14 +190,15 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.45") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.45") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META57:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.54") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.59") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.59") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META90:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA11]], !noalias [[META58]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA11]], !noalias [[META58]] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <2 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i8> -// CHECK-NEXT: store <2 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META58]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META94:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA14]], !noalias [[META97:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA14]], !noalias [[META97]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ugt <2 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i8> +// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !tbaa [[TBAA14]], !alias.scope [[META97]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -191,14 +206,15 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.50") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META61:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.68") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META98:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META62]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA11]], !noalias [[META62]] -// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> -// CHECK-NEXT: store <8 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META62]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META105:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META105]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <8 x i1> [[CMP_I_I_I_I]] to <8 x i16> +// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META105]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -206,29 +222,35 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.60") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.65") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.65") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META65:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.87") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.87") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META106:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.82", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) -// CHECK-NEXT: store i64 0, ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META66]] -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECISLI4EEERKNS6_IS5_LI4EEESA__EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I13_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META66]] -// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I13_I]]) #[[ATTR8]], !noalias [[META66]] -// CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] -// CHECK-NEXT: [[CONV5_I:%.*]] = sext i1 [[CMP_I_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I15_I:%.*]] = getelementptr inbounds nuw [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CONV5_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I15_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META66]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP69:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META107]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META113:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECISLI4EEERKNS6_IS5_LI4EEESA__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9]], !noalias [[META113]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I14_I_I]]) #[[ATTR9]], !noalias [[META113]] +// CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I_I]] +// CHECK-NEXT: [[CONV6_I_I:%.*]] = sext i1 [[CMP_I_I_I_I_I]] to i16 +// CHECK-NEXT: [[ARRAYIDX_I_I_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I_I_I16_I_I]], align 2, !tbaa [[TBAA78]], !noalias [[META113]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP114:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENS0_3vecIsLi4EEERKNS6_IS5_Li4EEESA_.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META113]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META113]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META107]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, @@ -239,128 +261,143 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.69") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.69") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.91") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.91") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META115:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META71]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I_I]], <4 x i32> poison, <3 x i32> -// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I]], zeroinitializer -// CHECK-NEXT: [[SEXT_I:%.*]] = sext <3 x i1> [[CMP_I]] to <3 x i32> -// CHECK-NEXT: [[EXTRACTVEC_I2_I:%.*]] = shufflevector <3 x i32> [[SEXT_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA11]], !alias.scope [[META71]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META119:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META122:![0-9]+]] +// CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I_I_I]], <4 x i32> poison, <3 x i32> +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I4_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I4_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META122]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.74") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META74:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.97") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.97") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META123:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META75]] -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] -// CHECK-NEXT: store <4 x i32> [[SUB_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META75]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META124:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META127:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META130:![0-9]+]] +// CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] +// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META130]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.78") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.78") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META78:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META131:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META79]] -// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) -// CHECK-NEXT: store <16 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA11]], !alias.scope [[META79]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META132:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META135:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META138:![0-9]+]] +// CHECK-NEXT: [[NOT_I_I_I_I:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) +// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META138]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.83") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META82:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.22") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META139:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META83]] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer -// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA11]], !alias.scope [[META83]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META140:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META143:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META146:![0-9]+]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <4 x i1> [[CMP_I_I_I_I]] to <4 x i8> +// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA14]], !alias.scope [[META146]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.88") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.93") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META86:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.114") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.119") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META147:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META87]] -// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer -// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA11]], !alias.scope [[META87]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META151:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META154:![0-9]+]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i16> +// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA14]], !alias.scope [[META154]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.55") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META90:![0-9]+]] !sycl_used_aspects [[META35]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META155:![0-9]+]] !sycl_used_aspects [[META53]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META91]] -// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <8 x half> [[TMP0]] -// CHECK-NEXT: store <8 x half> [[FNEG_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META91]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META156:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META159:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META162:![0-9]+]] +// CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <8 x half> [[TMP0]] +// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META162]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.98") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META94:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.126") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.36") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META163:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.126", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) -// CHECK-NEXT: store i64 0, ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META95]] -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META95]] -// CHECK-NEXT: [[TOBOOL_I:%.*]] = fcmp oeq float [[CALL_I_I_I]], 0.000000e+00 -// CHECK-NEXT: [[CONV3_I:%.*]] = sext i1 [[TOBOOL_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I10_I:%.*]] = getelementptr inbounds nuw [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CONV3_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I10_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META95]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP98:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META164:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META167:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META164]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META170:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9]], !noalias [[META170]] +// CHECK-NEXT: [[TOBOOL_I_I_I:%.*]] = fcmp oeq float [[CALL_I_I_I_I_I]], 0.000000e+00 +// CHECK-NEXT: [[CONV2_I_I:%.*]] = sext i1 [[TOBOOL_I_I_I]] to i16 +// CHECK-NEXT: [[ARRAYIDX_I_I_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I_I_I9_I_I]], align 2, !tbaa [[TBAA78]], !noalias [[META170]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP171:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META170]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META170]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META164]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.102") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META99:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.130") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.130") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META172:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.130", align 32 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.memset.p4.i64(ptr addrspace(4) noundef align 32 dereferenceable(32) [[AGG_RESULT]], i8 0, i64 32, i1 false), !alias.scope [[META100]] -// CHECK-NEXT: [[REF_TMP_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP_I]] to ptr addrspace(4) -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 16 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META103:![0-9]+]] -// CHECK-NEXT: [[FNEG_I:%.*]] = fneg float [[CALL_I_I_I]] -// CHECK-NEXT: store float [[FNEG_I]], ptr [[REF_TMP_I]], align 4, !tbaa [[TBAA47]], !noalias [[META103]] -// CHECK-NEXT: [[CALL_I_I10_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I]]) #[[ATTR8]], !noalias [[META103]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[ARRAYIDX_I_I_I9_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CALL_I_I10_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I9_I]], align 2, !tbaa [[TBAA103:![0-9]+]], !alias.scope [[META100]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP105:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META173:![0-9]+]] +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META176:![0-9]+]] +// CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 16 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META179:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9]], !noalias [[META182:![0-9]+]] +// CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg float [[CALL_I_I_I_I]] +// CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA76]], !noalias [[META182]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR9]], !noalias [[META182]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META179]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I_I_I7_I_I]], align 2, !tbaa [[TBAA78]], !noalias [[META176]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP185:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META173]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; }