diff --git a/clang/test/Driver/sycl-linker-wrapper-image.cpp b/clang/test/Driver/sycl-linker-wrapper-image.cpp index 3af7a619724e6..2dd8dc803b32f 100644 --- a/clang/test/Driver/sycl-linker-wrapper-image.cpp +++ b/clang/test/Driver/sycl-linker-wrapper-image.cpp @@ -44,21 +44,18 @@ int main() { // CHECK-DAG: @.sycl_offloading.target.0 = internal unnamed_addr constant [7 x i8] c"spir64\00" // CHECK-DAG: @.sycl_offloading.opts.compile.0 = internal unnamed_addr constant [1 x i8] zeroinitializer // CHECK-DAG: @.sycl_offloading.opts.link.0 = internal unnamed_addr constant [1 x i8] zeroinitializer -// CHECK-DAG: @prop = internal unnamed_addr constant [17 x i8] c"DeviceLibReqMask\00" -// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr null, i32 1, i64 0 }] -// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [24 x i8] c"SYCL/devicelib req mask\00" -// CHECK-DAG: @prop.1 = internal unnamed_addr constant [8 x i8] c"aspects\00" +// CHECK-DAG: @prop = internal unnamed_addr constant [8 x i8] c"aspects\00" // CHECK-DAG: @prop_val = internal unnamed_addr constant [8 x i8] zeroinitializer -// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop.1, ptr @prop_val, i32 2, i64 8 }] -// CHECK-DAG: @SYCL_PropSetName.3 = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" -// CHECK-DAG: @SYCL_PropSetName.4 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" -// CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }] +// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr @prop_val, i32 2, i64 8 }] +// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" +// CHECK-DAG: @SYCL_PropSetName.1 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" +// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [2 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.1, ptr null, ptr null }] // CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [912 x i8] // CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00" // CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @__sycl_offload_entry_name, i64 0, i64 0, ptr null }] // CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 912], section ".tgtimg", align 16 // CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata" -// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 3, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }] +// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 3, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([2 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 2) }] // CHECK-DAG: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null } // CHECK-DAG: @llvm.global_ctors = {{.*}} { i32 1, ptr @sycl.descriptor_reg, ptr null }] // CHECK-DAG: @llvm.global_dtors = {{.*}} { i32 1, ptr @sycl.descriptor_unreg, ptr null }] diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibBF16.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibBF16.h new file mode 100644 index 0000000000000..ac37441b4226a --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibBF16.h @@ -0,0 +1,16 @@ +//===----- SYCLDeviceLibBF16.h - get SYCL devicelib required Info -----=-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +namespace llvm { +class Function; +class Module; +bool isSYCLDeviceLibBF16Used(const Module &M); +bool isBF16DeviceLibFuncDecl(const Function &F); +} // namespace llvm diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h deleted file mode 100644 index 1336238133984..0000000000000 --- a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h +++ /dev/null @@ -1,47 +0,0 @@ -//===----- SYCLDeviceLibReqMask.h - get SYCL devicelib required Info -----=-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This function goes through input module's function list to detect all SYCL -// devicelib functions invoked. Each devicelib function invoked is included in -// one 'fallback' SPIR-V library loaded by SYCL runtime. After scanning all -// functions in input module, a mask telling which SPIR-V libraries are needed -// by input module indeed will be returned. This mask will be saved and used by -// SYCL runtime later. -//===----------------------------------------------------------------------===// - -#pragma once - -#include - -namespace llvm { - -class Function; -class Module; - -// DeviceLibExt is shared between sycl-post-link tool and sycl runtime. -// If any change is made here, need to sync with DeviceLibExt definition -// in sycl/source/detail/program_manager/program_manager.hpp -// TODO: clear all these DeviceLibExt defs when begin to remove sycl -// devicelib online link path. -enum class DeviceLibExt : std::uint32_t { - cl_intel_devicelib_assert, - cl_intel_devicelib_math, - cl_intel_devicelib_math_fp64, - cl_intel_devicelib_complex, - cl_intel_devicelib_complex_fp64, - cl_intel_devicelib_cstring, - cl_intel_devicelib_imf, - cl_intel_devicelib_imf_fp64, - cl_intel_devicelib_imf_bf16, - cl_intel_devicelib_bfloat16, -}; - -uint32_t getSYCLDeviceLibReqMask(const Module &M); -bool isSYCLDeviceLibBF16Used(const Module &M); -bool isBF16DeviceLibFuncDecl(const Function &F); -} // namespace llvm diff --git a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h index 43070dab50fbe..df95979c4d7ee 100644 --- a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h @@ -11,7 +11,6 @@ #pragma once #include "llvm/ADT/SetVector.h" -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/Support/PropertySetIO.h" #include namespace llvm { diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 8338b894bd109..ac42af1f64a57 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -209,9 +209,6 @@ class PropertySetRegistry { "SYCL/specialization constants"; static constexpr char SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[] = "SYCL/specialization constants default values"; - // TODO: remove SYCL_DEVICELIB_REQ_MASK when devicelib online linking path - // is totally removed. - static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; static constexpr char SYCL_DEVICELIB_METADATA[] = "SYCL/devicelib metadata"; static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt"; static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 636ce7efb17d0..4320eedd69bda 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -48,7 +48,7 @@ add_llvm_component_library(LLVMSYCLLowerIR SYCLAddOptLevelAttribute.cpp SYCLConditionalCallOnDevice.cpp SYCLCreateNVVMAnnotations.cpp - SYCLDeviceLibReqMask.cpp + SYCLDeviceLibBF16.cpp SYCLDeviceRequirements.cpp SYCLKernelParamOptInfo.cpp SYCLJointMatrixTransform.cpp diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp new file mode 100644 index 0000000000000..4fa06ab0c4cc5 --- /dev/null +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp @@ -0,0 +1,61 @@ +//==----- SYCLDeviceLibBF16.cpp - get SYCL BF16 devicelib required Info ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file provides some utils to analyze whether user's device image does +// depend on sycl bfloat16 device library functions. +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/SYCLDeviceLibBF16.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/TargetParser/Triple.h" + +static constexpr char DEVICELIB_FUNC_PREFIX[] = "__devicelib_"; + +using namespace llvm; + +static llvm::SmallVector BF16DeviceLibFuncs = { + "__devicelib_ConvertFToBF16INTEL", + "__devicelib_ConvertBF16ToFINTEL", + "__devicelib_ConvertFToBF16INTELVec1", + "__devicelib_ConvertBF16ToFINTELVec1", + "__devicelib_ConvertFToBF16INTELVec2", + "__devicelib_ConvertBF16ToFINTELVec2", + "__devicelib_ConvertFToBF16INTELVec3", + "__devicelib_ConvertBF16ToFINTELVec3", + "__devicelib_ConvertFToBF16INTELVec4", + "__devicelib_ConvertBF16ToFINTELVec4", + "__devicelib_ConvertFToBF16INTELVec8", + "__devicelib_ConvertBF16ToFINTELVec8", + "__devicelib_ConvertFToBF16INTELVec16", + "__devicelib_ConvertBF16ToFINTELVec16", +}; + +bool llvm::isSYCLDeviceLibBF16Used(const Module &M) { + if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) + return false; + + for (auto Fn : BF16DeviceLibFuncs) { + Function *BF16Func = M.getFunction(Fn); + if (BF16Func && BF16Func->isDeclaration()) + return true; + } + + return false; +} + +bool llvm::isBF16DeviceLibFuncDecl(const Function &F) { + if (!F.isDeclaration() || !F.getName().starts_with(DEVICELIB_FUNC_PREFIX)) + return false; + for (auto BFunc : BF16DeviceLibFuncs) { + if (!F.getName().compare(BFunc)) + return true; + } + + return false; +} diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp deleted file mode 100644 index c69364445c361..0000000000000 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp +++ /dev/null @@ -1,795 +0,0 @@ -//==----- SYCLDeviceLibReqMask.cpp - get SYCL devicelib required Info ------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This function goes through input module's function list to detect all SYCL -// devicelib functions invoked. Each devicelib function invoked is included in -// one 'fallback' SPIR-V library loaded by SYCL runtime. After scanning all -// functions in input module, a mask telling which SPIR-V libraries are needed -// by input module indeed will be returned. This mask will be saved and used by -// SYCL runtime later. -//===----------------------------------------------------------------------===// - -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/TargetParser/Triple.h" - -#include -#include - -static constexpr char DEVICELIB_FUNC_PREFIX[] = "__devicelib_"; - -using namespace llvm; -// We will gradually remove devicelib spv online linking path but keep -// bfloat16 devicelib spv as an exception for a short-term solution. -// For bfloat16 devicelib spv link, we won't rely on ReqMask but to embed -// the bits into executable if necessary -namespace { - -using SYCLDeviceLibFuncMap = std::unordered_map; - -// Please update SDLMap if any item is added to or removed from -// fallback device libraries in libdevice. -SYCLDeviceLibFuncMap SDLMap = { - {"__devicelib_abs", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_acosf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_acoshf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_asinf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_asinhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_atan2f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_atanf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_atanhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_cbrtf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_cosf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_coshf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_div", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_erfcf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_erff", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_exp2f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_expf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_expm1f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_fdimf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_fmaf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_fmodf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_frexpf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_hypotf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_ilogbf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_labs", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_ldiv", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_ldexpf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_lgammaf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_llabs", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_lldiv", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_log10f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_log1pf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_log2f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_logbf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_logf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_modff", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_nextafterf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_powf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_remainderf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_remquof", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_scalbnf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_sinf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_sinhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_sqrtf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_tanf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_tanhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_tgammaf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_acos", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_acosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_asin", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_asinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_atan", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_atan2", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_atanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_cbrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_cos", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_cosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_erf", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_erfc", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_exp", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_exp2", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_expm1", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_fdim", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_fma", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_fmod", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_frexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_hypot", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_ilogb", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_ldexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_lgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log10", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log1p", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log2", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_logb", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_modf", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_nextafter", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_pow", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_remainder", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_remquo", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_scalbn", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_sin", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_sinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_sqrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_tan", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_tanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_tgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib___divsc3", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib___mulsc3", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cabsf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cacosf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cacoshf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cargf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_casinf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_casinhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_catanf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_catanhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ccosf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ccoshf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cexpf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cimagf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_clogf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cpolarf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cpowf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cprojf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_crealf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_csinf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_csinhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_csqrtf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ctanf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ctanhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib___divdc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib___muldc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cabs", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cacos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cacosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_carg", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_casin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_casinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_catan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_catanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ccos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ccosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cexp", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cimag", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_clog", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cpolar", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cpow", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cproj", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_creal", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_csin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_csinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_csqrt", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ctan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ctanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_memcpy", DeviceLibExt::cl_intel_devicelib_cstring}, - {"__devicelib_memset", DeviceLibExt::cl_intel_devicelib_cstring}, - {"__devicelib_memcmp", DeviceLibExt::cl_intel_devicelib_cstring}, - {"__devicelib_assert_read", DeviceLibExt::cl_intel_devicelib_assert}, - {"__devicelib_assert_fail", DeviceLibExt::cl_intel_devicelib_assert}, - {"__devicelib_imf_llmax", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_llmin", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_max", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_min", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ullmax", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ullmin", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umax", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umin", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_brev", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_brevll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_byte_perm", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ffs", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ffsll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_clz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_clzll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_popc", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_popcll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sad", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uhadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_urhadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_hadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rhadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_mul24", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umul24", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_mulhi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umulhi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_mul64hi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umul64hi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_floorf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ceilf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_abs", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_llabs", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fabsf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_truncf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rintf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_nearbyintf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_invf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rsqrtf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaxf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fminf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_copysignf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_exp10f", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_expf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_logf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_log2f", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_log10f", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_powf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_fdividef", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float_as_int", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int_as_float", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float_as_uint", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint_as_float", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2float", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half_as_short", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half_as_ushort", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort_as_half", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short_as_half", DeviceLibExt::cl_intel_devicelib_imf}, - - {"__devicelib_imf_fmaf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_floorf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ceilf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fabsf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_truncf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rintf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_nearbyintf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_invf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rsqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaxf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fminf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_copysignf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vneg2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vneg4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vnegss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vnegss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vadd2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vadd4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddus2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddus4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsub2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsub4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubus2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubus4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vhaddu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vhaddu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpeq2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpeq4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpne2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpne4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpges2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpges4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgeu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgeu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgtu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgtu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmples2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmples4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpleu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpleu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmplts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmplts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpltu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpltu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmins2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmins4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vminu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vminu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vseteq2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vseteq4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetne2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetne4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetges2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetges4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgeu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgeu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgtu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgtu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetles2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetles4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetleu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetleu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetlts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetlts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetltu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetltu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsads2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsads4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsadu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsadu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s16x2_relu", - DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s16x2_relu", - DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_double2half", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_floor", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ceil", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fabs", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_trunc", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_rint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_rcp64h", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_nearbyint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_inv", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_rsqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fmax", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fmin", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_copysign", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2hiint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2loint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double_as_longlong", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_hiloint2double", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_int2double_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_uint2double_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_longlong_as_double", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_bfloat162float", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat16_as_short", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat16_as_ushort", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short_as_bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort_as_bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fmabf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fmaxbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fminbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_copysignbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_sqrtbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_rsqrtbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fabsbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_rintbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_floorbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ceilbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_truncbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, -}; - -// Each fallback device library corresponds to one bit in "require mask" which -// is an unsigned int32. getDeviceLibBit checks which fallback device library -// is required for FuncName and returns the corresponding bit. The corresponding -// mask for each fallback device library is: -// cl_intel_devicelib_assert: 0x1 -// cl_intel_devicelib_math: 0x2 -// cl_intel_devicelib_math_fp64: 0x4 -// cl_intel_devicelib_complex: 0x8 -// cl_intel_devicelib_complex_fp64: 0x10 -// cl_intel_devicelib_cstring : 0x20 -// cl_intel_devicelib_imf: 0x40 -// cl_intel_devicelib_imf_fp64: 0x80 -// cl_intel_devicelib_imf_bf16: 0x100 -// cl_intel_devicelib_bfloat16: 0x200 -uint32_t getDeviceLibBits(const std::string &FuncName) { - auto DeviceLibFuncIter = SDLMap.find(FuncName); - return ((DeviceLibFuncIter == SDLMap.end()) - ? 0 - : 0x1 << (static_cast(DeviceLibFuncIter->second) - - static_cast( - DeviceLibExt::cl_intel_devicelib_assert))); -} - -} // namespace - -// For each device image module, we go through all functions which meets -// 1. The function name has prefix "__devicelib_" -// 2. The function is declaration which means it doesn't have function body -// And we don't expect non-spirv functions with "__devicelib_" prefix. -uint32_t llvm::getSYCLDeviceLibReqMask(const Module &M) { - // Device libraries will be enabled only for spir-v module. - if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) - return 0; - uint32_t ReqMask = 0; - for (const Function &SF : M) { - if (SF.getName().starts_with(DEVICELIB_FUNC_PREFIX) && SF.isDeclaration()) { - assert(SF.getCallingConv() == CallingConv::SPIR_FUNC); - uint32_t DeviceLibBits = getDeviceLibBits(SF.getName().str()); - ReqMask |= DeviceLibBits; - } - } - return ReqMask; -} - -static llvm::SmallVector BF16DeviceLibFuncs = { - "__devicelib_ConvertFToBF16INTEL", - "__devicelib_ConvertBF16ToFINTEL", - "__devicelib_ConvertFToBF16INTELVec1", - "__devicelib_ConvertBF16ToFINTELVec1", - "__devicelib_ConvertFToBF16INTELVec2", - "__devicelib_ConvertBF16ToFINTELVec2", - "__devicelib_ConvertFToBF16INTELVec3", - "__devicelib_ConvertBF16ToFINTELVec3", - "__devicelib_ConvertFToBF16INTELVec4", - "__devicelib_ConvertBF16ToFINTELVec4", - "__devicelib_ConvertFToBF16INTELVec8", - "__devicelib_ConvertBF16ToFINTELVec8", - "__devicelib_ConvertFToBF16INTELVec16", - "__devicelib_ConvertBF16ToFINTELVec16", -}; - -bool llvm::isSYCLDeviceLibBF16Used(const Module &M) { - if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) - return false; - - for (auto Fn : BF16DeviceLibFuncs) { - Function *BF16Func = M.getFunction(Fn); - if (BF16Func && BF16Func->isDeclaration()) - return true; - } - - return false; -} - -bool llvm::isBF16DeviceLibFuncDecl(const Function &F) { - if (!F.isDeclaration() || !F.getName().starts_with(DEVICELIB_FUNC_PREFIX)) - return false; - for (auto BFunc : BF16DeviceLibFuncs) { - if (!F.getName().compare(BFunc)) - return true; - } - - return false; -} diff --git a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp index 07afffbecf552..f3172adc7769e 100644 --- a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" @@ -123,11 +122,6 @@ PropSetRegTy computeModuleProperties(const Module &M, bool AllowDeviceImageDependencies) { PropSetRegTy PropSet; - { - uint32_t MRMask = getSYCLDeviceLibReqMask(M); - std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); - } { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, computeDeviceRequirements(M, EntryPoints).asMap()); diff --git a/llvm/lib/SYCLPostLink/ModuleSplitter.cpp b/llvm/lib/SYCLPostLink/ModuleSplitter.cpp index 33b4dfb26d740..4c0bf9523d2cb 100644 --- a/llvm/lib/SYCLPostLink/ModuleSplitter.cpp +++ b/llvm/lib/SYCLPostLink/ModuleSplitter.cpp @@ -27,7 +27,7 @@ #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibBF16.h" #include "llvm/SYCLLowerIR/SYCLJointMatrixTransform.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SanitizerPostOptimizer.h" diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index b562c67ff1eb3..2d18edb2ef921 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -195,7 +195,6 @@ PropertyValue &PropertyValue::operator=(const PropertyValue &P) { } constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; -constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[]; constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[]; constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[]; diff --git a/llvm/test/tools/sycl-post-link/device-requirements/mask.ll b/llvm/test/tools/sycl-post-link/device-requirements/mask.ll deleted file mode 100644 index 31b393249bf5f..0000000000000 --- a/llvm/test/tools/sycl-post-link/device-requirements/mask.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: sycl-post-link -properties -split=auto < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.prop - -; CHECK:[SYCL/devicelib req mask] -; CHECK: DeviceLibReqMask=1|64 - -source_filename = "main.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spirv64-unknown-unknown" - -declare spir_func i32 @__devicelib_imf_umulhi(i32 noundef %0, i32 noundef %1) - -; Function Attrs: convergent mustprogress noinline norecurse optnone -define weak_odr dso_local spir_kernel void @kernel() #0 { -entry: - %0 = call i32 @__devicelib_imf_umulhi(i32 0, i32 0) - ret void -} - -attributes #0 = { "sycl-module-id"="main.cpp" } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 1d7d6686e1ad7..8f21122f9e996 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -33,6 +33,7 @@ #include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibBF16.h" #include "llvm/SYCLLowerIR/SYCLJointMatrixTransform.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" diff --git a/llvm/utils/git/requirements.txt b/llvm/utils/git/requirements.txt index 7340d7fccbd4a..3834b1fefbd22 100644 --- a/llvm/utils/git/requirements.txt +++ b/llvm/utils/git/requirements.txt @@ -258,9 +258,9 @@ typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 # via pygithub -urllib3==2.5.0 \ - --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \ - --hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc +urllib3==2.6.0 \ + --hash=sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f \ + --hash=sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1 # via # pygithub # requests diff --git a/llvm/utils/git/requirements_formatting.txt b/llvm/utils/git/requirements_formatting.txt index 2bc39effc7642..85a8bfd53ad15 100644 --- a/llvm/utils/git/requirements_formatting.txt +++ b/llvm/utils/git/requirements_formatting.txt @@ -298,9 +298,9 @@ toml==0.10.2 \ --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f # via darker -urllib3==2.5.0 \ - --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \ - --hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc +urllib3==2.6.0 \ + --hash=sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f \ + --hash=sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1 # via requests wrapt==1.16.0 \ --hash=sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc \ diff --git a/llvm/utils/git/requirements_linting.txt b/llvm/utils/git/requirements_linting.txt index b985b80aa869e..62dc0d95a7e32 100644 --- a/llvm/utils/git/requirements_linting.txt +++ b/llvm/utils/git/requirements_linting.txt @@ -235,9 +235,9 @@ requests==2.32.5 \ --hash=sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6 \ --hash=sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf # via pygithub -urllib3==2.5.0 \ - --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \ - --hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc +urllib3==2.6.0 \ + --hash=sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f \ + --hash=sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1 # via requests wrapt==1.17.3 \ --hash=sha256:02b551d101f31694fc785e58e0720ef7d9a10c4e62c1c9358ce6f63f23e30a56 \ diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index e8e5ab4e93bf2..30f39f5c5bc5d 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 0b02c3e25a4f7..9b0d925850f96 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -203,7 +203,6 @@ variables in production code. | `SYCL_PRINT_EXECUTION_GRAPH` | Described [below](#sycl_print_execution_graph-options) | Print execution graph to DOT text file. | | `SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP` | Any(\*) | Disable regular cleanup of enqueued (or finished, in case of host tasks) non-leaf command nodes. If disabled, command nodes will be cleaned up only during the destruction of the last remaining memory object used by them. | | `SYCL_DISABLE_POST_ENQUEUE_CLEANUP` (deprecated) | Any(\*) | Use `SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP` instead. | -| `SYCL_DEVICELIB_INHIBIT_NATIVE` | String of device library extensions (separated by a whitespace) | Do not rely on device native support for devicelib extensions listed in this option. | | `SYCL_PROGRAM_COMPILE_OPTIONS` | String of valid compile options | Override compile options for all programs. | | `SYCL_PROGRAM_LINK_OPTIONS` | String of valid link options | Override link options for all programs. | | `SYCL_PROGRAM_APPEND_COMPILE_OPTIONS` | String of valid compile options | Append to the end of compile options for all programs. | diff --git a/sycl/doc/design/PropertySets.md b/sycl/doc/design/PropertySets.md index 4cfdb552d6622..9c51b7ea86915 100644 --- a/sycl/doc/design/PropertySets.md +++ b/sycl/doc/design/PropertySets.md @@ -72,20 +72,6 @@ constant. See also [SYCL2020-SpecializationConstants.md](./SYCL2020-SpecializationConstants.md). - -### [SYCL/devicelib req mask] - -__Key:__ At most one entry with "DeviceLibReqMask". - -__Value type:__ 32 bit integer. ("1") - -__Value:__ A bitmask of which device libraries the binary uses. - -__Notes:__ - -1. If this property set is missing, no device libraries are used by the binary. - - ### [SYCL/kernel param opt] __Key:__ Kernel name. diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp index 0978fa4b8eb23..0bc4872f76641 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp @@ -42,7 +42,8 @@ enum class node_type { host_task = 9, native_command = 10, async_malloc = 11, - async_free = 12 + async_free = 12, + host_sync = 13 }; /// Class representing a node in the graph, returned by command_graph::add(). diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index dd510f70fa4c6..0d833dbe2279a 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -44,9 +44,6 @@ /// PropertySetIO.h #define __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP \ "SYCL/specialization constants default values" -/// TODO: remove req mask when sycl devicelib online linking path is removed. -/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h -#define __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" /// PropertySetRegistry::SYCL_DEVICELIB_METADATA defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_DEVICELIB_METADATA "SYCL/devicelib metadata" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 053597fbc857e..7d032b4ef2e47 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -133,7 +133,6 @@ context_impl::~context_impl() { detail::ProgramManager::getInstance() .getProfileCounterDeviceGlobalEntries(this)) DGEntry->cleanupProfileCounter(this); - MCachedLibPrograms.clear(); // TODO catch an exception and put it to list of asynchronous exceptions getAdapter().call_nocheck(MContext); } catch (std::exception &e) { diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 3b42a6319d223..34e9e394d4649 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -130,27 +130,6 @@ class context_impl : public std::enable_shared_from_this { devices_range getDevices() const { return MDevices; } - using CachedLibProgramsT = - std::map, - Managed>; - - /// In contrast to user programs, which are compiled from user code, library - /// programs come from the SYCL runtime. They are identified by the - /// corresponding extension: - /// - /// cl_intel_devicelib_assert -> # - /// cl_intel_devicelib_complex -> # - /// etc. - /// - /// See `doc/design/DeviceLibExtensions.rst' for - /// more details. - /// - /// \returns an instance of sycl::detail::Locked which wraps a map with device - /// library programs and the corresponding lock for synchronized access. - Locked acquireCachedLibPrograms() { - return {MCachedLibPrograms, MCachedLibProgramsMutex}; - } - KernelProgramCache &getKernelProgramCache() const { return MKernelProgramCache; } @@ -266,8 +245,6 @@ class context_impl : public std::enable_shared_from_this { ur_context_handle_t MContext; platform_impl &MPlatform; property_list MPropList; - CachedLibProgramsT MCachedLibPrograms; - std::mutex MCachedLibProgramsMutex; mutable KernelProgramCache MKernelProgramCache; mutable PropertySupport MSupportBufferLocationByDevices; diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 6e2f0baa4f369..4b3ef86dd1a50 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -188,7 +188,6 @@ RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { SpecConstIDMap.init(Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); SpecConstDefaultValuesMap.init( Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); - DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); DeviceLibMetadata.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_METADATA); KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); ImplicitLocalArg.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPLICIT_LOCAL_ARG); @@ -551,10 +550,6 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( &MergedExportedSymbols, &MergedRegisteredKernels}; // Exclusive merges. - auto MergedDeviceLibReqMask = - exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { - return Img.getDeviceLibReqMask(); - }); auto MergedProgramMetadata = exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getProgramMetadata(); @@ -571,9 +566,8 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( std::array *, - 4> - MergedMaps{&MergedDeviceLibReqMask, &MergedProgramMetadata, - &MergedImportedSymbols, &MergedMisc}; + 3> + MergedMaps{&MergedProgramMetadata, &MergedImportedSymbols, &MergedMisc}; // When merging exported and imported, the exported symbols may cancel out // some of the imported symbols. @@ -676,7 +670,6 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols); CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels); - CopyPropertiesMap(MergedDeviceLibReqMask, DeviceLibReqMask); CopyPropertiesMap(MergedProgramMetadata, ProgramMetadata); CopyPropertiesMap(MergedImportedSymbols, ImportedSymbols); CopyPropertiesMap(MergedMisc, Misc); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 2cd380c91bd65..b4818017268ea 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -216,7 +216,6 @@ class RTDeviceBinaryImage { const PropertyRange &getSpecConstantsDefaultValues() const { return SpecConstDefaultValuesMap; } - const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } const PropertyRange &getDeviceLibMetadata() const { return DeviceLibMetadata; } @@ -255,7 +254,6 @@ class RTDeviceBinaryImage { ur::DeviceBinaryType Format = SYCL_DEVICE_BINARY_TYPE_NONE; RTDeviceBinaryImage::PropertyRange SpecConstIDMap; RTDeviceBinaryImage::PropertyRange SpecConstDefaultValuesMap; - RTDeviceBinaryImage::PropertyRange DeviceLibReqMask; RTDeviceBinaryImage::PropertyRange DeviceLibMetadata; RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange ProgramMetadata; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 6b7957bf781bd..7c4822d9fc5bd 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -272,10 +272,30 @@ void event_impl::wait(bool *Success) { throw sycl::exception(make_error_code(errc::invalid), "wait method cannot be used for a discarded event."); + printf("FOOBAR3 %d\n", MGraph.expired()); + + if (!MGraph.expired()) { - throw sycl::exception(make_error_code(errc::invalid), - "wait method cannot be used for an event associated " - "with a command graph."); + auto GraphImpl = MGraph.lock(); + + // Add a host sync node to the graph to create a partition point. + // TODO: test if partitioned wait bits are set + if (GraphImpl) { + auto EmptyCG = std::make_shared( + detail::CGType::None, + detail::CG::StorageInitHelper{} + ); + + std::vector EmptyDeps; + ext::oneapi::experimental::detail::node_impl &HostSyncNode = GraphImpl->add( + ext::oneapi::experimental::node_type::host_sync, + EmptyCG, + EmptyDeps + ); + + printf("FOOBAR2\n"); + + } } #ifdef XPTI_ENABLE_INSTRUMENTATION diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 213ddc14b63a9..9aab0f1c166f0 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -18,6 +18,7 @@ #include // for kernel_impl #include // ProgramManager #include // for queue_impl +#include // for debug output #include // for SYCLMemObjT #include // for stack #include // for tls_code_loc_t etc.. @@ -68,6 +69,8 @@ inline const char *nodeTypeToString(node_type NodeType) { return "async_malloc"; case node_type::async_free: return "async_free"; + case node_type::host_sync: + return "host_sync"; } assert(false && "Unhandled node type"); return {}; @@ -140,19 +143,19 @@ void propagatePartitionUp(node_impl &Node, int PartitionNum) { /// remain. /// @param Node Node to assign to the partition. /// @param PartitionNum Number to propagate. -/// @param HostTaskList List of host tasks that have already been processed and +/// @param CutVertexList List of tasks that have already been processed and /// are encountered as successors to the node Node. void propagatePartitionDown(node_impl &Node, int PartitionNum, - std::list &HostTaskList) { + std::list &CutVertexList) { if (Node.MCGType == sycl::detail::CGType::CodeplayHostTask) { if (Node.MPartitionNum != -1) { - HostTaskList.push_front(&Node); + CutVertexList.push_front(&Node); } return; } Node.MPartitionNum = PartitionNum; for (node_impl &Successor : Node.successors()) { - propagatePartitionDown(Successor, PartitionNum, HostTaskList); + propagatePartitionDown(Successor, PartitionNum, CutVertexList); } } @@ -180,24 +183,50 @@ void partition::updateSchedule() { void exec_graph_impl::makePartitions() { int CurrentPartition = -1; - std::list HostTaskList; + std::list CutVertexList; + +#define SYCL_GRAPH_DEBUG 1 +#ifdef SYCL_GRAPH_DEBUG + // Debug: Print total number of nodes + std::cout << "[DEBUG] makePartitions: Starting with " << MNodeStorage.size() << " nodes" << std::endl; + + // Debug: Print all nodes and their types + int nodeIndex = 0; + for (node_impl &Node : nodes()) { + std::cout << "[DEBUG] Node " << nodeIndex << ": Type=" << nodeTypeToString(Node.MNodeType) + << ", CGType=" << static_cast(Node.MCGType) << std::endl; + nodeIndex++; + } +#endif + + // A cut vertex is a node that, when removed, increases the number of connected components + // in the graph. In our case, cut vertices are host-tasks / sync tasks that separate partitions + auto const IsCutVertex = [](node_impl const& node) { + return node.MCGType == sycl::detail::CGType::CodeplayHostTask || + node.MNodeType == node_type::host_sync; + }; + // find all the host-tasks in the graph for (node_impl &Node : nodes()) { - if (Node.MCGType == sycl::detail::CGType::CodeplayHostTask) { - HostTaskList.push_back(&Node); + if (IsCutVertex(Node)) { + CutVertexList.push_back(&Node); } } - MContainsHostTask = HostTaskList.size() > 0; + MContainsHostTask = CutVertexList.size() > 0; +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Found " << CutVertexList.size() << " host tasks, MContainsHostTask=" + << (MContainsHostTask ? "true" : "false") << std::endl; +#endif // Annotate nodes // The first step in graph partitioning is to annotate all nodes of the graph // with a temporary partition or group number. This step allows us to group // the graph nodes into sets of nodes with kind of meta-dependencies that must // be enforced by the runtime. For example, Group 2 depends on Groups 0 and 1, // which means that we should not try to run Group 2 before Groups 0 and 1 - // have finished executing. Since host-tasks are currently the only tasks that + // have finished executing. Since host-tasks and sync-tasks are the only tasks that // require runtime dependency handling, groups of nodes are created from - // host-task nodes. We therefore loop over all the host-task nodes, and for + // these nodes. We therefore loop over all the host-task and sync-task nodes, and for // each node: // - Its predecessors are assigned to group number `n-1` // - The node itself constitutes a group, group number `n` @@ -213,23 +242,32 @@ void exec_graph_impl::makePartitions() { // case, the host-task node `A` must be reprocessed after the node `B` and the // group that includes the predecessor of `B` can be merged with the group of // the predecessors of the node `A`. - while (HostTaskList.size() > 0) { - node_impl &Node = *HostTaskList.front(); - HostTaskList.pop_front(); + while (CutVertexList.size() > 0) { + node_impl &Node = *CutVertexList.front(); + CutVertexList.pop_front(); +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Processing host task node, CurrentPartition=" << CurrentPartition << std::endl; +#endif CurrentPartition++; for (node_impl &Predecessor : Node.predecessors()) { propagatePartitionUp(Predecessor, CurrentPartition); } CurrentPartition++; Node.MPartitionNum = CurrentPartition; +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Assigned host task to partition " << CurrentPartition << std::endl; +#endif CurrentPartition++; - auto TmpSize = HostTaskList.size(); + auto TmpSize = CutVertexList.size(); for (node_impl &Successor : Node.successors()) { - propagatePartitionDown(Successor, CurrentPartition, HostTaskList); + propagatePartitionDown(Successor, CurrentPartition, CutVertexList); } - if (HostTaskList.size() > TmpSize) { + if (CutVertexList.size() > TmpSize) { +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Host task list size increased, merging partitions" << std::endl; +#endif // At least one HostTask has been re-numbered so group merge opportunities - for (node_impl *HT : HostTaskList) { + for (node_impl *HT : CutVertexList) { auto HTPartitionNum = HT->MPartitionNum; if (HTPartitionNum != -1) { // can merge predecessors of node `Node` with predecessors of node @@ -246,14 +284,32 @@ void exec_graph_impl::makePartitions() { } } +#ifdef SYCL_GRAPH_DEBUG + // Debug: Print node partition assignments before creating partitions + std::cout << "[DEBUG] Node partition assignments:" << std::endl; + nodeIndex = 0; + for (node_impl &Node : nodes()) { + std::cout << "[DEBUG] Node " << nodeIndex << ": Partition=" << Node.MPartitionNum + << ", Type=" << nodeTypeToString(Node.MNodeType) << std::endl; + nodeIndex++; + } +#endif + // Create partitions int PartitionFinalNum = 0; +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Creating partitions from " << -1 << " to " << CurrentPartition << std::endl; +#endif for (int i = -1; i <= CurrentPartition; i++) { const std::shared_ptr &Partition = std::make_shared(); + int nodesInPartition = 0; + int rootsInPartition = 0; for (node_impl &Node : nodes()) { if (Node.MPartitionNum == i) { + nodesInPartition++; MPartitionNodes[&Node] = PartitionFinalNum; if (isPartitionRoot(Node)) { + rootsInPartition++; Partition->MRoots.insert(&Node); if (Node.MCGType == CGType::CodeplayHostTask) { Partition->MIsHostTask = true; @@ -261,21 +317,37 @@ void exec_graph_impl::makePartitions() { } } } +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Partition " << i << ": " << nodesInPartition << " nodes, " + << rootsInPartition << " roots" << std::endl; +#endif if (Partition->MRoots.size() > 0) { Partition->updateSchedule(); Partition->MIsInOrderGraph = Partition->checkIfGraphIsSinglePath(); MPartitions.push_back(Partition); MRootPartitions.push_back(Partition); +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Added partition " << PartitionFinalNum << " (original " << i + << "), IsHostTask=" << (Partition->MIsHostTask ? "true" : "false") + << ", IsInOrder=" << (Partition->MIsInOrderGraph ? "true" : "false") << std::endl; +#endif PartitionFinalNum++; } } // Add an empty partition if there is no partition, i.e. empty graph if (MPartitions.empty()) { +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] No partitions created, adding empty partition" << std::endl; +#endif MPartitions.push_back(std::make_shared()); MRootPartitions.push_back(MPartitions[0]); } +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Final result: " << MPartitions.size() << " partitions created" << std::endl; +#endif + // Make global schedule list for (const auto &Partition : MPartitions) { MSchedule.insert(MSchedule.end(), Partition->MSchedule.begin(), @@ -283,22 +355,39 @@ void exec_graph_impl::makePartitions() { } // Compute partition dependencies + int partitionIdx = 0; for (const auto &Partition : MPartitions) { + int predecessorCount = 0; + int successorCount = 0; for (node_impl &Root : Partition->roots()) { for (node_impl &NodeDep : Root.predecessors()) { auto &Predecessor = MPartitions[MPartitionNodes[&NodeDep]]; Partition->MPredecessors.push_back(Predecessor.get()); Predecessor->MSuccessors.push_back(Partition.get()); + predecessorCount++; } } + for (auto &Succ : Partition->MSuccessors) { + successorCount++; + } +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Partition " << partitionIdx << " dependencies: " + << predecessorCount << " predecessors, " << successorCount << " successors" << std::endl; +#endif + partitionIdx++; } // Reset node groups (if node have to be re-processed - e.g. subgraph) for (node_impl &Node : nodes()) { Node.MPartitionNum = -1; } + +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] makePartitions completed" << std::endl; +#endif } + graph_impl::graph_impl(const sycl::context &SyclContext, const sycl::device &SyclDevice, const sycl::property_list &PropList) @@ -699,6 +788,7 @@ void graph_impl::beginRecordingUnlockedQueue(sycl::detail::queue_impl &Queue) { void graph_impl::beginRecording(sycl::detail::queue_impl &Queue) { graph_impl::WriteLock Lock(MMutex); + printf("Graph %p beginRecording on Queue %p\n", this, &Queue); if (!Queue.hasCommandGraph()) { Queue.setCommandGraph(shared_from_this()); addQueue(Queue); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 8abbe0fdc261f..ba59d2cbb7a5a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -914,19 +914,6 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, NativePrg, Adapter); } - // Link a fallback implementation of device libraries if they are not - // supported by a device compiler. - // Pre-compiled programs (after AOT compilation or read from persitent - // cache) are supposed to be already linked. - // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means - // no fallback device library will be linked. - uint32_t DeviceLibReqMask = 0; - bool UseDeviceLibs = !DeviceCodeWasInCache && - MainImg.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && - !SYCLConfig::get(); - if (UseDeviceLibs) - DeviceLibReqMask = getDeviceLibReqMask(MainImg); - std::vector> ProgramsToLink; // If we had a program in cache, then it should have been the fully linked // program already. @@ -936,8 +923,6 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, // Oth image is the main one and has been handled, skip it. for (std::size_t I = 1; I < ImgWithDeps.getAll().size(); ++I) { const RTDeviceBinaryImage *BinImg = ImgWithDeps.getAll()[I]; - if (UseDeviceLibs) - DeviceLibReqMask |= getDeviceLibReqMask(*BinImg); Managed NativePrg = createURProgram(*BinImg, ContextImpl, Devs); @@ -957,7 +942,7 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, Managed BuiltProgram = build(std::move(NativePrg), ContextImpl, CompileOpts, LinkOpts, - URDevices, DeviceLibReqMask, ProgramsToLink, + URDevices, ProgramsToLink, /*CreatedFromBinary*/ MainImg.getFormat() != SYCL_DEVICE_BINARY_TYPE_SPIRV); @@ -1204,91 +1189,6 @@ ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, return Log; } -// TODO device libraries may use scpecialization constants, manifest files, etc. -// To support that they need to be delivered in a different container - so that -// sycl_device_binary_struct can be created for each of them. -static Managed loadDeviceLib(context_impl &Context, - const char *Name) { - std::string LibSyclDir = OSUtil::getCurrentDSODir(); - std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, - std::ifstream::in | std::ifstream::binary); - if (!File.good()) { - return {}; - } - - File.seekg(0, std::ios::end); - size_t FileSize = File.tellg(); - File.seekg(0, std::ios::beg); - std::vector FileContent(FileSize); - File.read(&FileContent[0], FileSize); - File.close(); - - return createSpirvProgram(Context, (unsigned char *)&FileContent[0], - FileSize); -} - -// For each extension, a pair of library names. The first uses native support, -// the second emulates functionality in software. -static const std::map> - DeviceLibNames = { - {DeviceLibExt::cl_intel_devicelib_assert, - {nullptr, "libsycl-fallback-cassert.spv"}}, - {DeviceLibExt::cl_intel_devicelib_math, - {nullptr, "libsycl-fallback-cmath.spv"}}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, - {nullptr, "libsycl-fallback-cmath-fp64.spv"}}, - {DeviceLibExt::cl_intel_devicelib_complex, - {nullptr, "libsycl-fallback-complex.spv"}}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, - {nullptr, "libsycl-fallback-complex-fp64.spv"}}, - {DeviceLibExt::cl_intel_devicelib_cstring, - {nullptr, "libsycl-fallback-cstring.spv"}}, - {DeviceLibExt::cl_intel_devicelib_imf, - {nullptr, "libsycl-fallback-imf.spv"}}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, - {nullptr, "libsycl-fallback-imf-fp64.spv"}}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, - {nullptr, "libsycl-fallback-imf-bf16.spv"}}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, - {"libsycl-native-bfloat16.spv", "libsycl-fallback-bfloat16.spv"}}}; - -static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) { - auto LibPair = DeviceLibNames.find(Extension); - const char *Lib = nullptr; - if (LibPair != DeviceLibNames.end()) - Lib = Native ? LibPair->second.first : LibPair->second.second; - if (Lib == nullptr) - throw exception(make_error_code(errc::build), - "Unhandled (new?) device library extension"); - return Lib; -} - -// For each extension understood by the SYCL runtime, the string representation -// of its name. Names with devicelib in them are internal to the runtime. Others -// are actual OpenCL extensions. -static const std::map DeviceLibExtensionStrs = { - {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"}, - {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, - "cl_intel_devicelib_math_fp64"}, - {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, - "cl_intel_devicelib_complex_fp64"}, - {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"}, - {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, - "cl_intel_bfloat16_conversions"}}; - -static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { - auto Ext = DeviceLibExtensionStrs.find(Extension); - if (Ext == DeviceLibExtensionStrs.end()) - throw exception(make_error_code(errc::build), - "Unhandled (new?) device library extension"); - return Ext->second; -} - static ur_result_t doCompile(adapter_impl &Adapter, ur_program_handle_t Program, uint32_t NumDevs, ur_device_handle_t *Devs, ur_context_handle_t Ctx, const char *Opts) { @@ -1303,88 +1203,6 @@ static ur_result_t doCompile(adapter_impl &Adapter, ur_program_handle_t Program, return Result; } -static ur_program_handle_t -loadDeviceLibFallback(context_impl &Context, DeviceLibExt Extension, - std::vector &Devices, - bool UseNativeLib) { - - auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); - auto LockedCache = Context.acquireCachedLibPrograms(); - auto &CachedLibPrograms = LockedCache.get(); - // Collect list of devices to compile the library for. Library was already - // compiled for a device if there is a corresponding record in the per-context - // cache. - std::vector DevicesToCompile; - Managed *UrProgram = nullptr; - assert(Devices.size() > 0 && - "At least one device is expected in the input vector"); - // Vector of devices that don't have the library cached. - for (ur_device_handle_t Dev : Devices) { - auto [It, Inserted] = CachedLibPrograms.emplace( - std::make_pair(Extension, Dev), Managed{}); - if (!Inserted) { - Managed &CachedUrProgram = It->second; - assert(CachedUrProgram && "If device lib UR program was cached then is " - "expected to be not a nullptr"); - assert(!UrProgram || *UrProgram == CachedUrProgram); - // Managed::operator& is overloaded, use - // `std::addressof`: - UrProgram = std::addressof(CachedUrProgram); - } else { - DevicesToCompile.push_back(Dev); - } - } - - if (DevicesToCompile.empty()) - return *UrProgram; - - auto EraseProgramForDevices = [&]() { - for (auto Dev : DevicesToCompile) - CachedLibPrograms.erase(std::make_pair(Extension, Dev)); - }; - - Managed NewlyCreated; - // Create UR program for device lib if we don't have it yet. - if (!UrProgram) { - NewlyCreated = loadDeviceLib(Context, LibFileName); - if (NewlyCreated == nullptr) { - EraseProgramForDevices(); - throw exception(make_error_code(errc::build), - std::string("Failed to load ") + LibFileName); - } - } - - // Insert UrProgram into the cache for all devices that we will compile for. - for (auto Dev : DevicesToCompile) { - Managed &Cached = - CachedLibPrograms[std::make_pair(Extension, Dev)]; - if (NewlyCreated) { - Cached = std::move(NewlyCreated); - UrProgram = std::addressof(Cached); - } else { - Cached = UrProgram->retain(); - } - } - - adapter_impl &Adapter = Context.getAdapter(); - // TODO no spec constants are used in the std libraries, support in the future - // Do not use compile options for library programs: it is not clear if user - // options (image options) are supposed to be applied to library program as - // well, and what actually happens to a SPIR-V program if we apply them. - ur_result_t Error = - doCompile(Adapter, *UrProgram, DevicesToCompile.size(), - DevicesToCompile.data(), Context.getHandleRef(), ""); - if (Error != UR_RESULT_SUCCESS) { - EraseProgramForDevices(); - throw detail::set_ur_error( - exception(make_error_code(errc::build), - ProgramManager::getProgramBuildLog(*UrProgram, Context)), - Error); - } - - return *UrProgram; -} - ProgramManager::ProgramManager() : m_SanitizerFoundInImage(SanitizerType::None) { const char *SpvFile = std::getenv(UseSpvEnv); @@ -1581,95 +1399,6 @@ const RTDeviceBinaryImage &ProgramManager::getDeviceImage( return **ImageIterator; } -static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { - uint32_t Mask = - 0x1 << (static_cast(Ext) - - static_cast(DeviceLibExt::cl_intel_devicelib_assert)); - return ((DeviceLibReqMask & Mask) == Mask); -} - -static std::vector -getDeviceLibPrograms(context_impl &Context, - std::vector &Devices, - uint32_t DeviceLibReqMask) { - std::vector Programs; - - std::pair RequiredDeviceLibExt[] = { - {DeviceLibExt::cl_intel_devicelib_assert, - /* is fallback loaded? */ false}, - {DeviceLibExt::cl_intel_devicelib_math, false}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_complex, false}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_cstring, false}, - {DeviceLibExt::cl_intel_devicelib_imf, false}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, false}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, false}}; - - // Disable all devicelib extensions requiring fp64 support if at least - // one underlying device doesn't support cl_khr_fp64. - const bool fp64Support = std::all_of( - Devices.begin(), Devices.end(), [&Context](ur_device_handle_t Device) { - return Context.getPlatformImpl().getDeviceImpl(Device)->has_extension( - "cl_khr_fp64"); - }); - - // Load a fallback library for an extension if the any device does not - // support it. - for (auto Device : Devices) { - // TODO: device_impl::has_extension should cache extension string, then we'd - // be able to use that in the loop below directly. - std::string DevExtList = urGetInfoString( - *Context.getPlatformImpl().getDeviceImpl(Device), - UR_DEVICE_INFO_EXTENSIONS); - - for (auto &Pair : RequiredDeviceLibExt) { - DeviceLibExt Ext = Pair.first; - bool &FallbackIsLoaded = Pair.second; - - if (FallbackIsLoaded) { - continue; - } - - if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) { - continue; - } - - // Skip loading the fallback library that requires fp64 support if any - // device in the list doesn't support fp64. - if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_imf_fp64) && - !fp64Support) { - continue; - } - - auto ExtName = getDeviceLibExtensionStr(Ext); - - bool InhibitNativeImpl = false; - if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { - InhibitNativeImpl = strstr(Env, ExtName) != nullptr; - } - - bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName); - if (!DeviceSupports || InhibitNativeImpl) { - Programs.push_back(loadDeviceLibFallback(Context, Ext, Devices, - /*UseNativeLib=*/false)); - FallbackIsLoaded = true; - } else { - // bfloat16 needs native library if device supports it - if (Ext == DeviceLibExt::cl_intel_devicelib_bfloat16) { - Programs.push_back(loadDeviceLibFallback(Context, Ext, Devices, - /*UseNativeLib=*/true)); - FallbackIsLoaded = true; - } - } - } - } - return Programs; -} - // Check if device image is compressed. static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { @@ -1680,7 +1409,7 @@ static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { Managed ProgramManager::build( Managed Program, context_impl &Context, const std::string &CompileOptions, const std::string &LinkOptions, - std::vector &Devices, uint32_t DeviceLibReqMask, + std::vector &Devices, const std::vector> &ExtraProgramsToLink, bool CreatedFromBinary) { @@ -1688,30 +1417,18 @@ Managed ProgramManager::build( std::cerr << ">>> ProgramManager::build(" << static_cast(Program) << ", " << CompileOptions << ", " << LinkOptions << ", " - << VecToString(Devices) << ", " << std::hex << DeviceLibReqMask - << std::dec << ", " << VecToString(ExtraProgramsToLink) << ", " - << CreatedFromBinary << ")\n"; + << VecToString(Devices) << ", " << std::dec << ", " + << VecToString(ExtraProgramsToLink) << ", " << CreatedFromBinary + << ")\n"; } - bool LinkDeviceLibs = (DeviceLibReqMask != 0); - - // TODO: this is a temporary workaround for GPU tests for ESIMD compiler. - // We do not link with other device libraries, because it may fail - // due to unrecognized SPIR-V format of those libraries. - if (CompileOptions.find(std::string("-cmc")) != std::string::npos || - CompileOptions.find(std::string("-vc-codegen")) != std::string::npos) - LinkDeviceLibs = false; - std::vector LinkPrograms; - if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibPrograms(Context, Devices, DeviceLibReqMask); - } static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK"); static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1'); adapter_impl &Adapter = Context.getAdapter(); - if (LinkPrograms.empty() && ExtraProgramsToLink.empty() && !ForceLink) { + if (ExtraProgramsToLink.empty() && !ForceLink) { const std::string &Options = LinkOptions.empty() ? CompileOptions : (CompileOptions + " " + LinkOptions); @@ -2264,15 +1981,6 @@ void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, F.close(); } -uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { - const RTDeviceBinaryImage::PropertyRange &DLMRange = - Img.getDeviceLibReqMask(); - if (DLMRange.isAvailable()) - return DeviceBinaryProperty(*(DLMRange.begin())).asUint32(); - else - return 0x0; -} - const KernelArgMask * ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, std::string_view KernelName) { @@ -3277,10 +2985,8 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( // No linking of extra programs reqruired. std::vector> ExtraProgramsToLink; std::vector Devs = {DeviceImpl.getHandleRef()}; - auto BuildProgram = - build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, Devs, - /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0, - ExtraProgramsToLink); + auto BuildProgram = build(std::move(ProgramManaged), ContextImpl, CompileOpts, + LinkOpts, Devs, ExtraProgramsToLink); Managed UrKernel{Adapter}; Adapter.call( BuildProgram, KernelName.data(), &UrKernel); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index c7a1a0aafb854..c279fe4934830 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -77,21 +77,6 @@ class devices_range; class queue_impl; class event_impl; class device_images_range; -// DeviceLibExt is shared between sycl runtime and sycl-post-link tool. -// If any update is made here, need to sync with DeviceLibExt definition -// in llvm/tools/sycl-post-link/sycl-post-link.cpp -enum class DeviceLibExt : std::uint32_t { - cl_intel_devicelib_assert, - cl_intel_devicelib_math, - cl_intel_devicelib_math_fp64, - cl_intel_devicelib_complex, - cl_intel_devicelib_complex_fp64, - cl_intel_devicelib_cstring, - cl_intel_devicelib_imf, - cl_intel_devicelib_imf_fp64, - cl_intel_devicelib_imf_bf16, - cl_intel_devicelib_bfloat16, -}; enum class SanitizerType { None, @@ -227,8 +212,6 @@ class ProgramManager { static std::string getProgramBuildLog(const ur_program_handle_t &Program, context_impl &Context); - uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); - /// Returns the mask for eliminated kernel arguments for the requested kernel /// within the native program. /// \param NativePrg the UR program associated with the kernel. @@ -430,7 +413,7 @@ class ProgramManager { Managed build(Managed Program, context_impl &Context, const std::string &CompileOptions, const std::string &LinkOptions, - std::vector &Devices, uint32_t DeviceLibReqMask, + std::vector &Devices, const std::vector> &ProgramsToLink, bool CreatedFromBinary = false); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 50fd63b05b291..d4ba46206f12f 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -888,11 +891,31 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId); } #endif + printf("FOOBAR4 %d\n", MGraph.expired()); + if (!MGraph.expired()) { - throw sycl::exception(make_error_code(errc::invalid), - "wait cannot be called for a queue which is " - "recording to a command graph."); + auto GraphImpl = MGraph.lock(); + + // TODO: test if partitioned wait bits are set + if (GraphImpl) { + + auto EmptyCG = std::make_shared( + detail::CGType::None, + detail::CG::StorageInitHelper{}, + CodeLoc + ); + + printf("FOOBAR1\n"); + + std::vector EmptyDeps; + GraphImpl->add( + ext::oneapi::experimental::node_type::host_sync, + EmptyCG, + EmptyDeps + ); + } + return; } // If there is an external event set, we know we are using an in-order queue diff --git a/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp new file mode 100644 index 0000000000000..695e7a3fd8ce0 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp @@ -0,0 +1,129 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests partitioned wait feature in SYCL Graph. + +#include "../graph_common.hpp" + +#include + +int main() { + property_list Properties{property::queue::in_order{}}; + queue Queue{Properties}; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 100; + int *A = malloc_device(N, Queue); + int *B = malloc_device(N, Queue); + int *C = malloc_device(N, Queue); + int *D = malloc_device(N, Queue); + + Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + A[it] = static_cast(it); + B[it] = 0; + C[it] = 0; + D[it] = 0; + }); + }).wait(); + + // Begin recording the graph + Graph.begin_recording(Queue); + + // Part 1: "Before" subgraph operations + auto Event1 = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + B[it] = A[it] * 2; + }); + }); + + auto Event2 = Queue.submit([&](handler &CGH) { + CGH.depends_on(Event1); + CGH.parallel_for(N, [=](id<1> it) { + C[it] = B[it] + 1; + }); + }); + + // should create a dummy barrier node in the graph + Queue.wait(); + + // Part 2: "After" subgraph operations + auto Event3 = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + D[it] = C[it] * 3; + }); + }); + + Queue.wait(); + + Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + D[it] = D[it] + A[it]; + }); + }); + + Graph.end_recording(); + + auto ExecGraph = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); + Queue.wait_and_throw(); + + // Verify results + std::vector OutputA(N), OutputB(N), OutputC(N), OutputD(N); + Queue.memcpy(OutputA.data(), A, N * sizeof(int)).wait(); + Queue.memcpy(OutputB.data(), B, N * sizeof(int)).wait(); + Queue.memcpy(OutputC.data(), C, N * sizeof(int)).wait(); + Queue.memcpy(OutputD.data(), D, N * sizeof(int)).wait(); + + for (size_t i = 0; i < N; i++) { + int expected_a = static_cast(i); + int expected_b = expected_a * 2; + int expected_c = expected_b + 1; + int expected_d = expected_c * 3 + expected_a; + + assert(check_value(i, expected_a, OutputA[i], "A")); + assert(check_value(i, expected_b, OutputB[i], "B")); + assert(check_value(i, expected_c, OutputC[i], "C")); + assert(check_value(i, expected_d, OutputD[i], "D")); + } + + // Reset data and verify with new input + Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + A[it] = static_cast(it) + 10; // Different input + B[it] = 0; + C[it] = 0; + D[it] = 0; + }); + }).wait(); + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); + Queue.wait_and_throw(); + + Queue.memcpy(OutputA.data(), A, N * sizeof(int)).wait(); + Queue.memcpy(OutputB.data(), B, N * sizeof(int)).wait(); + Queue.memcpy(OutputC.data(), C, N * sizeof(int)).wait(); + Queue.memcpy(OutputD.data(), D, N * sizeof(int)).wait(); + + for (size_t i = 0; i < N; i++) { + int expected_a = static_cast(i) + 10; + int expected_b = expected_a * 2; + int expected_c = expected_b + 1; + int expected_d = expected_c * 3 + expected_a; + + assert(check_value(i, expected_a, OutputA[i], "A (second execution)")); + assert(check_value(i, expected_b, OutputB[i], "B (second execution)")); + assert(check_value(i, expected_c, OutputC[i], "C (second execution)")); + assert(check_value(i, expected_d, OutputD[i], "D (second execution)")); + } + + sycl::free(A, Queue); + sycl::free(B, Queue); + sycl::free(C, Queue); + sycl::free(D, Queue); + + return 0; +} diff --git a/sycl/unittests/helpers/MockDeviceImage.hpp b/sycl/unittests/helpers/MockDeviceImage.hpp index cd85140076c27..81e5b2ba420f4 100644 --- a/sycl/unittests/helpers/MockDeviceImage.hpp +++ b/sycl/unittests/helpers/MockDeviceImage.hpp @@ -161,38 +161,7 @@ template LifetimeExtender(std::vector) -> LifetimeExtender; /// Convenience wrapper for sycl_device_binary_property_set. class MockPropertySet { public: - MockPropertySet(const std::vector &DeviceLibExts = {}) { - // Most of unit-tests are statically linked with SYCL RT. On Linux and Mac - // systems that causes incorrect RT installation directory detection, which - // prevents proper loading of fallback libraries. See intel/llvm#6945 - // - // Fallback libraries are automatically loaded and linked into device image - // unless there is a special property attached to it or special env variable - // is set which forces RT to skip fallback libraries. - // - // By default, property is set to empty mask here so that unit-tests can be - // launched under any environment. Some unit tests might create dummy - // fallback libaries and require fallback libraries to be loaded, in such - // case input vector will be non-empty. - - std::vector Data(/* four elements */ 4, - /* each element is zero */ 0); - if (!DeviceLibExts.empty()) { - uint32_t DeviceLibReqMask = 0; - for (auto Ext : DeviceLibExts) { - DeviceLibReqMask |= 0x1 - << (static_cast(Ext) - - static_cast( - DeviceLibExt::cl_intel_devicelib_assert)); - } - std::memcpy(Data.data(), &DeviceLibReqMask, sizeof(DeviceLibReqMask)); - } - // Name doesn't matter here, it is not used by RT - // Value must be an all-zero 32-bit mask, which would mean that no fallback - // libraries are needed to be loaded. - MockProperty DeviceLibReqMask("", Data, SYCL_PROPERTY_TYPE_UINT32); - insert(__SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK, std::move(DeviceLibReqMask)); - } + MockPropertySet() = default; /// Adds a new property to the set. /// diff --git a/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp b/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp index 81ddb0af61454..63f5a3dec1425 100644 --- a/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp +++ b/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp @@ -28,52 +28,18 @@ using namespace sycl; class MultipleDevsKernelBundleTestKernel; -class DevLibTestKernel; +class MultipleDevsCacheTestKernel; MOCK_INTEGRATION_HEADER(MultipleDevsKernelBundleTestKernel) -MOCK_INTEGRATION_HEADER(DevLibTestKernel) +MOCK_INTEGRATION_HEADER(MultipleDevsCacheTestKernel) using namespace sycl::unittest; -inline void createDummyDeviceLib(sycl::detail::DeviceLibExt Ext) { - // Create a dummy fallback library correpsonding to the extension (if it - // doesn't exist). - std::string ExtName; - switch (Ext) { - case sycl::detail::DeviceLibExt::cl_intel_devicelib_math: - ExtName = "libsycl-fallback-cmath"; - break; - case sycl::detail::DeviceLibExt::cl_intel_devicelib_assert: - ExtName = "libsycl-fallback-cassert"; - break; - default: - FAIL() << "Unknown device library extension"; - } - - auto DSOPath = sycl::detail::OSUtil::getCurrentDSODir(); - std::string LibPath = DSOPath + detail::OSUtil::DirSep + ExtName + ".spv"; - std::ifstream LibFile(LibPath); - if (LibFile.good()) { - LibFile.close(); - } else { - std::ofstream LibFile(LibPath); - LibFile << "0"; - LibFile.close(); - } -} - -// Function to geneate mock device image which uses device libraries. -inline sycl::unittest::MockDeviceImage generateImage( - std::initializer_list KernelNames, - sycl::detail::ur::DeviceBinaryType BinType, const char *DeviceTargetSpec, - const std::vector &DeviceLibExts = {}) { - // Create dummy device libraries if they don't exist. - for (auto Ext : DeviceLibExts) { - createDummyDeviceLib(Ext); - } - - MockPropertySet PropSet(DeviceLibExts); - +inline sycl::unittest::MockDeviceImage +generateImage(std::initializer_list KernelNames, + sycl::detail::ur::DeviceBinaryType BinType, + const char *DeviceTargetSpec) { + MockPropertySet PropSet; std::string Combined; for (auto it = KernelNames.begin(); it != KernelNames.end(); ++it) { if (it != KernelNames.begin()) @@ -99,15 +65,12 @@ inline sycl::unittest::MockDeviceImage generateImage( static sycl::unittest::MockDeviceImage Imgs[3] = { sycl::unittest::generateDefaultImage( {"MultipleDevsKernelBundleTestKernel"}), - generateImage({"DevLibTestKernel"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64, - {sycl::detail::DeviceLibExt::cl_intel_devicelib_math, - sycl::detail::DeviceLibExt::cl_intel_devicelib_assert}), - generateImage({"DevLibTestKernel"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64, - {sycl::detail::DeviceLibExt::cl_intel_devicelib_math, - sycl::detail::DeviceLibExt::cl_intel_devicelib_assert})}; - + generateImage({"MultipleDevsCacheTestKernel"}, + SYCL_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64), + generateImage({"MultipleDevsCacheTestKernel"}, + SYCL_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64)}; static sycl::unittest::MockDeviceImageArray<3> ImgArray{Imgs}; struct MockDeviceData { @@ -325,13 +288,8 @@ TEST_P(MultipleDevsKernelBundleTest, BuildTwiceWithOverlappingDevices) { } // Test to check several use cases for multi-device kernel bundles. -// Test covers AOT and JIT cases. We mock usage of fallback device libaries to -// excersise additional logic in the program manager. Checks are used to test -// that program and device libraries caching works as expected. -TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { - // Unset the SYCL_DEVICELIB_NO_FALLBACK so that fallback libraries are used. - ScopedEnvVar var("SYCL_DEVICELIB_NO_FALLBACK", nullptr, - SYCLConfig::reset); +// Test covers AOT and JIT cases. +TEST_P(MultipleDevsKernelBundleTest, MultipleDevsCache) { std::vector Devices = Plt.get_devices(GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE ? sycl::info::device_type::cpu @@ -360,48 +318,43 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { // Get bundle in executable state for multiple devices in a context, enqueue // a kernel to each device. - sycl::kernel_id KernelID = sycl::get_kernel_id(); + sycl::kernel_id KernelID = + sycl::get_kernel_id(); sycl::kernel_bundle KernelBundleExecutable = sycl::get_kernel_bundle( Context, {Dev1, Dev2, Dev3}, {KernelID}); for (int i = 0; i < 2; i++) { Queues[i].submit([=](sycl::handler &cgh) { cgh.use_kernel_bundle(KernelBundleExecutable); - cgh.single_task([=]() {}); + cgh.single_task([=]() {}); }); Queues[i].wait(); } if (GetParam() == SYCL_DEVICE_BINARY_TYPE_SPIRV) { - // Verify the number of urProgramCreateWithIL calls: we expect 2 calls for - // fallback libraries (assert + math) and 1 call for the main program. - EXPECT_EQ(ProgramCreateWithILCounter, 3) - << "Expect 3 urProgramCreateWithIL calls"; - - // Verify the number of urProgramBuildExp calls: none expected as we - // compile and link in this case. - EXPECT_EQ(ProgramBuildExpCounter, 0) - << "Expect 0 urProgramBuildExp calls"; - - // Verify the number of urProgramCompileExp calls: we expect 2 calls to - // compile fallback libraries and 1 call to compile the main program. - EXPECT_EQ(ProgramCompileExpCounter, 3) - << "Expect 3 urProgramCompileExp calls"; - - // Verify the number of urProgramLinkExp calls: we expect 1 call which - // links the main program and fallback libraries. - EXPECT_EQ(ProgramLinkExpCounter, 1) << "Expect 1 urProgramLinkExp calls"; + // Verify the number of urProgramCreateWithIL calls: we expect 1 call + // for main program + EXPECT_EQ(ProgramCreateWithILCounter, 1) + << "Expect 1 urProgramCreateWithIL calls"; + + // Verify the number of urProgramBuildExp calls: we expect 1 for main + EXPECT_EQ(ProgramBuildExpCounter, 1) + << "Expect 1 urProgramBuildExp calls"; + + // Verify the number of urProgramLinkExp calls: none expected. + EXPECT_EQ(ProgramLinkExpCounter, 0) << "Expect 0 urProgramLinkExp calls"; } + if (GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE) { // In case of AOT compilation, we expect 1 call to // urProgramCreateWithBinary. EXPECT_EQ(ProgramCreateWithBinaryCounter, 1) - << "Expect 3 urProgramCreateWithIL calls"; + << "Expect 1 urProgramCreateWithIL calls"; // And a single call to urProgramBuildExp. In this case libraries are // linked beforehand, so we don't compile/link them online. EXPECT_EQ(ProgramBuildExpCounter, 1) - << "Expect 0 urProgramBuildExp calls"; + << "Expect 1 urProgramBuildExp calls"; } } @@ -409,8 +362,7 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { // Test case 2 // Get bundles in executable state: for pairs of devices excluding dev4 and - // for the new set of devices which includes the dev4. This checks caching - // of the programs and device libraries. + // for the new set of devices which includes the dev4. // Reset counters ProgramCreateWithILCounter = 0; @@ -418,7 +370,8 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { ProgramLinkExpCounter = 0; ProgramCompileExpCounter = 0; ProgramCreateWithBinaryCounter = 0; - sycl::kernel_id KernelID = sycl::get_kernel_id(); + sycl::kernel_id KernelID = + sycl::get_kernel_id(); // Program associated with {dev1, dev2, dev3} is supposed to be cached from // the first test case, we don't expect any additional program creation and // compilation calls for the following bundles because they are all created @@ -437,24 +390,24 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { sycl::get_kernel_bundle(Context, {Dev3}, {KernelID}); EXPECT_EQ(ProgramCreateWithILCounter, 0); + EXPECT_EQ(ProgramBuildExpCounter, 0); EXPECT_EQ(ProgramCompileExpCounter, 0); EXPECT_EQ(ProgramLinkExpCounter, 0); // Next we create a bundle with a different set of devices which includes // dev4, so we expect new UR program creation. Also main program will be - // compiled for new set of devices. Each of device libraries (assert and - // math) will be additionally compiled for dev4, but no program creation is - // expected for device libraries as program handle already exists in the - // per-context cache. + // built for new set of devices. sycl::kernel_bundle KernelBundleExecutableNewSet = sycl::get_kernel_bundle( Context, {Dev2, Dev3, Dev4}, {KernelID}); if (GetParam() == SYCL_DEVICE_BINARY_TYPE_SPIRV) { EXPECT_EQ(ProgramCreateWithILCounter, 1) << "Expect 1 urProgramCreateWithIL calls"; - EXPECT_EQ(ProgramCompileExpCounter, 3) - << "Expect 3 urProgramCompileExp calls"; - EXPECT_EQ(ProgramLinkExpCounter, 1) << "Expect 1 urProgramLinkExp calls"; + EXPECT_EQ(ProgramBuildExpCounter, 1) + << "Expect 1 urProgramBuildExp calls"; + EXPECT_EQ(ProgramCompileExpCounter, 0) + << "Expect 0 urProgramCompileExp calls"; + EXPECT_EQ(ProgramLinkExpCounter, 0) << "Expect 0 urProgramLinkExp calls"; } if (GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE) { @@ -467,20 +420,17 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { for (int i = 0; i < 3; i++) { Queues[0].submit([=](sycl::handler &cgh) { cgh.use_kernel_bundle(KernelBundleExecutableSubset1); - cgh.single_task([=]() {}); + cgh.single_task([=]() {}); }); Queues[0].wait(); Queues[2].submit([=](sycl::handler &cgh) { cgh.use_kernel_bundle(KernelBundleExecutableNewSet); - cgh.single_task([=]() {}); + cgh.single_task([=]() {}); }); Queues[2].wait(); } } - - // Reset the SYCL_DEVICELIB_NO_FALLBACK to its original value. - sycl::detail::SYCLConfig::reset(); } // The following helpers and test verify persistent cache usage when we have