From 14c960fcd0623f37fa93e10c627e866ef3839484 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 22:29:18 +0000 Subject: [PATCH 1/8] Bump urllib3 from 2.5.0 to 2.6.0 in /llvm/utils/git (#20850) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.5.0 to 2.6.0.
Release notes

Sourced from urllib3's releases.

2.6.0

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Security

[!IMPORTANT]

  • If urllib3 is not installed with the optional urllib3[brotli] extra, but your environment contains a Brotli/brotlicffi/brotlipy package anyway, make sure to upgrade it to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 to benefit from the security fixes and avoid warnings. Prefer using urllib3[brotli] to install a compatible Brotli package automatically.
  • If you use custom decompressors, please make sure to update them to respect the changed API of urllib3.response.ContentDecoder.

Features

Removals

Bugfixes

Misc

Changelog

Sourced from urllib3's changelog.

2.6.0 (2025-12-05)

Security

.. caution::

Features

Removals

Bugfixes

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=urllib3&package-manager=pip&previous-version=2.5.0&new-version=2.6.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/intel/llvm/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- llvm/utils/git/requirements.txt | 6 +++--- llvm/utils/git/requirements_formatting.txt | 6 +++--- llvm/utils/git/requirements_linting.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/utils/git/requirements.txt b/llvm/utils/git/requirements.txt index 7340d7fccbd4a..3834b1fefbd22 100644 --- a/llvm/utils/git/requirements.txt +++ b/llvm/utils/git/requirements.txt @@ -258,9 +258,9 @@ typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 # via pygithub -urllib3==2.5.0 \ - --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \ - --hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc +urllib3==2.6.0 \ + --hash=sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f \ + --hash=sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1 # via # pygithub # requests diff --git a/llvm/utils/git/requirements_formatting.txt b/llvm/utils/git/requirements_formatting.txt index 2bc39effc7642..85a8bfd53ad15 100644 --- a/llvm/utils/git/requirements_formatting.txt +++ b/llvm/utils/git/requirements_formatting.txt @@ -298,9 +298,9 @@ toml==0.10.2 \ --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f # via darker -urllib3==2.5.0 \ - --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \ - --hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc +urllib3==2.6.0 \ + --hash=sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f \ + --hash=sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1 # via requests wrapt==1.16.0 \ --hash=sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc \ diff --git a/llvm/utils/git/requirements_linting.txt b/llvm/utils/git/requirements_linting.txt index b985b80aa869e..62dc0d95a7e32 100644 --- a/llvm/utils/git/requirements_linting.txt +++ b/llvm/utils/git/requirements_linting.txt @@ -235,9 +235,9 @@ requests==2.32.5 \ --hash=sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6 \ --hash=sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf # via pygithub -urllib3==2.5.0 \ - --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \ - --hash=sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc +urllib3==2.6.0 \ + --hash=sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f \ + --hash=sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1 # via requests wrapt==1.17.3 \ --hash=sha256:02b551d101f31694fc785e58e0720ef7d9a10c4e62c1c9358ce6f63f23e30a56 \ From 726f8d2130e85675aa7f711e5e71890426fedbff Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 9 Dec 2025 08:43:18 +0800 Subject: [PATCH 2/8] [SYCL] Remove devicelib jit link (#20777) The '-fsycl-device-lib-jit-link' option has been removed in https://github.com/intel/llvm/pull/20326. This PR cleans up sycl-post-link and sycl runtime code to support this option. The removed code includes: 1. Analysis in sycl-post-link tool to record which device libraries are used by user's device image via image property 2. Program manager handling for device library require mask property to decide which fallback spv will be loaded and linked during execution time. 3. Program manager utils to read/load device library fallback spv files into memory and utils to link in-memory spv device library modules with user's module. --------- Signed-off-by: jinge90 --- .../test/Driver/sycl-linker-wrapper-image.cpp | 15 +- .../llvm/SYCLLowerIR/SYCLDeviceLibBF16.h | 16 + .../llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h | 47 -- .../SYCLPostLink/ComputeModuleRuntimeInfo.h | 1 - llvm/include/llvm/Support/PropertySetIO.h | 3 - llvm/lib/SYCLLowerIR/CMakeLists.txt | 2 +- llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp | 61 ++ llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp | 795 ------------------ .../SYCLPostLink/ComputeModuleRuntimeInfo.cpp | 6 - llvm/lib/SYCLPostLink/ModuleSplitter.cpp | 2 +- llvm/lib/Support/PropertySetIO.cpp | 1 - .../device-requirements/mask.ll | 20 - llvm/tools/sycl-post-link/sycl-post-link.cpp | 1 + .../lib/rtc/DeviceCompilation.cpp | 1 + sycl/doc/EnvironmentVariables.md | 1 - sycl/doc/design/PropertySets.md | 14 - sycl/source/detail/compiler.hpp | 3 - sycl/source/detail/context_impl.cpp | 1 - sycl/source/detail/context_impl.hpp | 23 - sycl/source/detail/device_binary_image.cpp | 11 +- sycl/source/detail/device_binary_image.hpp | 2 - .../program_manager/program_manager.cpp | 310 +------ .../program_manager/program_manager.hpp | 19 +- sycl/unittests/helpers/MockDeviceImage.hpp | 33 +- .../MultipleDevsKernelBundle.cpp | 138 +-- 25 files changed, 143 insertions(+), 1383 deletions(-) create mode 100644 llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibBF16.h delete mode 100644 llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h create mode 100644 llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp delete mode 100644 llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp delete mode 100644 llvm/test/tools/sycl-post-link/device-requirements/mask.ll diff --git a/clang/test/Driver/sycl-linker-wrapper-image.cpp b/clang/test/Driver/sycl-linker-wrapper-image.cpp index 3af7a619724e6..2dd8dc803b32f 100644 --- a/clang/test/Driver/sycl-linker-wrapper-image.cpp +++ b/clang/test/Driver/sycl-linker-wrapper-image.cpp @@ -44,21 +44,18 @@ int main() { // CHECK-DAG: @.sycl_offloading.target.0 = internal unnamed_addr constant [7 x i8] c"spir64\00" // CHECK-DAG: @.sycl_offloading.opts.compile.0 = internal unnamed_addr constant [1 x i8] zeroinitializer // CHECK-DAG: @.sycl_offloading.opts.link.0 = internal unnamed_addr constant [1 x i8] zeroinitializer -// CHECK-DAG: @prop = internal unnamed_addr constant [17 x i8] c"DeviceLibReqMask\00" -// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr null, i32 1, i64 0 }] -// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [24 x i8] c"SYCL/devicelib req mask\00" -// CHECK-DAG: @prop.1 = internal unnamed_addr constant [8 x i8] c"aspects\00" +// CHECK-DAG: @prop = internal unnamed_addr constant [8 x i8] c"aspects\00" // CHECK-DAG: @prop_val = internal unnamed_addr constant [8 x i8] zeroinitializer -// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop.1, ptr @prop_val, i32 2, i64 8 }] -// CHECK-DAG: @SYCL_PropSetName.3 = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" -// CHECK-DAG: @SYCL_PropSetName.4 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" -// CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }] +// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr @prop_val, i32 2, i64 8 }] +// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" +// CHECK-DAG: @SYCL_PropSetName.1 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" +// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [2 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.1, ptr null, ptr null }] // CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [912 x i8] // CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00" // CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @__sycl_offload_entry_name, i64 0, i64 0, ptr null }] // CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 912], section ".tgtimg", align 16 // CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata" -// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 3, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }] +// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 3, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([2 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 2) }] // CHECK-DAG: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null } // CHECK-DAG: @llvm.global_ctors = {{.*}} { i32 1, ptr @sycl.descriptor_reg, ptr null }] // CHECK-DAG: @llvm.global_dtors = {{.*}} { i32 1, ptr @sycl.descriptor_unreg, ptr null }] diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibBF16.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibBF16.h new file mode 100644 index 0000000000000..ac37441b4226a --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibBF16.h @@ -0,0 +1,16 @@ +//===----- SYCLDeviceLibBF16.h - get SYCL devicelib required Info -----=-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +namespace llvm { +class Function; +class Module; +bool isSYCLDeviceLibBF16Used(const Module &M); +bool isBF16DeviceLibFuncDecl(const Function &F); +} // namespace llvm diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h deleted file mode 100644 index 1336238133984..0000000000000 --- a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h +++ /dev/null @@ -1,47 +0,0 @@ -//===----- SYCLDeviceLibReqMask.h - get SYCL devicelib required Info -----=-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This function goes through input module's function list to detect all SYCL -// devicelib functions invoked. Each devicelib function invoked is included in -// one 'fallback' SPIR-V library loaded by SYCL runtime. After scanning all -// functions in input module, a mask telling which SPIR-V libraries are needed -// by input module indeed will be returned. This mask will be saved and used by -// SYCL runtime later. -//===----------------------------------------------------------------------===// - -#pragma once - -#include - -namespace llvm { - -class Function; -class Module; - -// DeviceLibExt is shared between sycl-post-link tool and sycl runtime. -// If any change is made here, need to sync with DeviceLibExt definition -// in sycl/source/detail/program_manager/program_manager.hpp -// TODO: clear all these DeviceLibExt defs when begin to remove sycl -// devicelib online link path. -enum class DeviceLibExt : std::uint32_t { - cl_intel_devicelib_assert, - cl_intel_devicelib_math, - cl_intel_devicelib_math_fp64, - cl_intel_devicelib_complex, - cl_intel_devicelib_complex_fp64, - cl_intel_devicelib_cstring, - cl_intel_devicelib_imf, - cl_intel_devicelib_imf_fp64, - cl_intel_devicelib_imf_bf16, - cl_intel_devicelib_bfloat16, -}; - -uint32_t getSYCLDeviceLibReqMask(const Module &M); -bool isSYCLDeviceLibBF16Used(const Module &M); -bool isBF16DeviceLibFuncDecl(const Function &F); -} // namespace llvm diff --git a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h index 43070dab50fbe..df95979c4d7ee 100644 --- a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h @@ -11,7 +11,6 @@ #pragma once #include "llvm/ADT/SetVector.h" -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/Support/PropertySetIO.h" #include namespace llvm { diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 8338b894bd109..ac42af1f64a57 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -209,9 +209,6 @@ class PropertySetRegistry { "SYCL/specialization constants"; static constexpr char SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[] = "SYCL/specialization constants default values"; - // TODO: remove SYCL_DEVICELIB_REQ_MASK when devicelib online linking path - // is totally removed. - static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; static constexpr char SYCL_DEVICELIB_METADATA[] = "SYCL/devicelib metadata"; static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt"; static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 636ce7efb17d0..4320eedd69bda 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -48,7 +48,7 @@ add_llvm_component_library(LLVMSYCLLowerIR SYCLAddOptLevelAttribute.cpp SYCLConditionalCallOnDevice.cpp SYCLCreateNVVMAnnotations.cpp - SYCLDeviceLibReqMask.cpp + SYCLDeviceLibBF16.cpp SYCLDeviceRequirements.cpp SYCLKernelParamOptInfo.cpp SYCLJointMatrixTransform.cpp diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp new file mode 100644 index 0000000000000..4fa06ab0c4cc5 --- /dev/null +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceLibBF16.cpp @@ -0,0 +1,61 @@ +//==----- SYCLDeviceLibBF16.cpp - get SYCL BF16 devicelib required Info ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file provides some utils to analyze whether user's device image does +// depend on sycl bfloat16 device library functions. +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/SYCLDeviceLibBF16.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/TargetParser/Triple.h" + +static constexpr char DEVICELIB_FUNC_PREFIX[] = "__devicelib_"; + +using namespace llvm; + +static llvm::SmallVector BF16DeviceLibFuncs = { + "__devicelib_ConvertFToBF16INTEL", + "__devicelib_ConvertBF16ToFINTEL", + "__devicelib_ConvertFToBF16INTELVec1", + "__devicelib_ConvertBF16ToFINTELVec1", + "__devicelib_ConvertFToBF16INTELVec2", + "__devicelib_ConvertBF16ToFINTELVec2", + "__devicelib_ConvertFToBF16INTELVec3", + "__devicelib_ConvertBF16ToFINTELVec3", + "__devicelib_ConvertFToBF16INTELVec4", + "__devicelib_ConvertBF16ToFINTELVec4", + "__devicelib_ConvertFToBF16INTELVec8", + "__devicelib_ConvertBF16ToFINTELVec8", + "__devicelib_ConvertFToBF16INTELVec16", + "__devicelib_ConvertBF16ToFINTELVec16", +}; + +bool llvm::isSYCLDeviceLibBF16Used(const Module &M) { + if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) + return false; + + for (auto Fn : BF16DeviceLibFuncs) { + Function *BF16Func = M.getFunction(Fn); + if (BF16Func && BF16Func->isDeclaration()) + return true; + } + + return false; +} + +bool llvm::isBF16DeviceLibFuncDecl(const Function &F) { + if (!F.isDeclaration() || !F.getName().starts_with(DEVICELIB_FUNC_PREFIX)) + return false; + for (auto BFunc : BF16DeviceLibFuncs) { + if (!F.getName().compare(BFunc)) + return true; + } + + return false; +} diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp deleted file mode 100644 index c69364445c361..0000000000000 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp +++ /dev/null @@ -1,795 +0,0 @@ -//==----- SYCLDeviceLibReqMask.cpp - get SYCL devicelib required Info ------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This function goes through input module's function list to detect all SYCL -// devicelib functions invoked. Each devicelib function invoked is included in -// one 'fallback' SPIR-V library loaded by SYCL runtime. After scanning all -// functions in input module, a mask telling which SPIR-V libraries are needed -// by input module indeed will be returned. This mask will be saved and used by -// SYCL runtime later. -//===----------------------------------------------------------------------===// - -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/TargetParser/Triple.h" - -#include -#include - -static constexpr char DEVICELIB_FUNC_PREFIX[] = "__devicelib_"; - -using namespace llvm; -// We will gradually remove devicelib spv online linking path but keep -// bfloat16 devicelib spv as an exception for a short-term solution. -// For bfloat16 devicelib spv link, we won't rely on ReqMask but to embed -// the bits into executable if necessary -namespace { - -using SYCLDeviceLibFuncMap = std::unordered_map; - -// Please update SDLMap if any item is added to or removed from -// fallback device libraries in libdevice. -SYCLDeviceLibFuncMap SDLMap = { - {"__devicelib_abs", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_acosf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_acoshf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_asinf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_asinhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_atan2f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_atanf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_atanhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_cbrtf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_cosf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_coshf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_div", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_erfcf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_erff", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_exp2f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_expf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_expm1f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_fdimf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_fmaf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_fmodf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_frexpf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_hypotf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_ilogbf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_labs", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_ldiv", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_ldexpf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_lgammaf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_llabs", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_lldiv", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_log10f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_log1pf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_log2f", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_logbf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_logf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_modff", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_nextafterf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_powf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_remainderf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_remquof", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_scalbnf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_sinf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_sinhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_sqrtf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_tanf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_tanhf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_tgammaf", DeviceLibExt::cl_intel_devicelib_math}, - {"__devicelib_acos", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_acosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_asin", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_asinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_atan", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_atan2", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_atanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_cbrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_cos", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_cosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_erf", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_erfc", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_exp", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_exp2", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_expm1", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_fdim", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_fma", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_fmod", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_frexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_hypot", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_ilogb", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_ldexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_lgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log10", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log1p", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_log2", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_logb", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_modf", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_nextafter", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_pow", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_remainder", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_remquo", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_scalbn", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_sin", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_sinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_sqrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_tan", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_tanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib_tgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, - {"__devicelib___divsc3", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib___mulsc3", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cabsf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cacosf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cacoshf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cargf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_casinf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_casinhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_catanf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_catanhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ccosf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ccoshf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cexpf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cimagf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_clogf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cpolarf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cpowf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_cprojf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_crealf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_csinf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_csinhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_csqrtf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ctanf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib_ctanhf", DeviceLibExt::cl_intel_devicelib_complex}, - {"__devicelib___divdc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib___muldc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cabs", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cacos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cacosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_carg", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_casin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_casinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_catan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_catanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ccos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ccosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cexp", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cimag", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_clog", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cpolar", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cpow", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_cproj", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_creal", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_csin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_csinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_csqrt", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ctan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_ctanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, - {"__devicelib_memcpy", DeviceLibExt::cl_intel_devicelib_cstring}, - {"__devicelib_memset", DeviceLibExt::cl_intel_devicelib_cstring}, - {"__devicelib_memcmp", DeviceLibExt::cl_intel_devicelib_cstring}, - {"__devicelib_assert_read", DeviceLibExt::cl_intel_devicelib_assert}, - {"__devicelib_assert_fail", DeviceLibExt::cl_intel_devicelib_assert}, - {"__devicelib_imf_llmax", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_llmin", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_max", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_min", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ullmax", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ullmin", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umax", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umin", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_brev", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_brevll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_byte_perm", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ffs", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ffsll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_clz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_clzll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_popc", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_popcll", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sad", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uhadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_urhadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_hadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rhadd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_mul24", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umul24", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_mulhi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umulhi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_mul64hi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_umul64hi", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_floorf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ceilf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_abs", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_llabs", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fabsf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_truncf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rintf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_nearbyintf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_invf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rsqrtf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaxf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fminf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_copysignf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_exp10f", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_expf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_logf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_log2f", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_log10f", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_powf", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fast_fdividef", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fadd_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fsub_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmul_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fdiv_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaf_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2int_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2uint_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ll_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2ull_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float_as_int", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int_as_float", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float_as_uint", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint_as_float", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_float2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2float", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2int_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ll_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2short_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2uint_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ull_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half2ushort_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half_as_short", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_half_as_ushort", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_uint2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ull2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ushort_as_half", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_int2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ll2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_rd", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_rn", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_ru", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short2half_rz", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_short_as_half", DeviceLibExt::cl_intel_devicelib_imf}, - - {"__devicelib_imf_fmaf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_floorf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_ceilf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fabsf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_truncf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rintf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_nearbyintf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_invf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_sqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_rsqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fmaxf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_fminf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_copysignf16", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vneg2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vneg4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vnegss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vnegss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vabsdiffu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vadd2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vadd4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddus2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vaddus4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsub2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsub4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubss2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubss4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubus2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsubus4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vavgu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vhaddu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vhaddu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpeq2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpeq4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpne2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpne4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpges2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpges4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgeu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgeu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgtu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpgtu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmples2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmples4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpleu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpleu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmplts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmplts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpltu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vcmpltu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxs2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxs4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmaxu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmins2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vmins4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vminu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vminu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vseteq2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vseteq4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetne2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetne4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetges2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetges4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgeu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgeu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgtu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetgtu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetles2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetles4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetleu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetleu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetlts2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetlts4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetltu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsetltu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsads2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsads4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsadu2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vsadu4", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s16x2_relu", - DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmax_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s16x2_relu", - DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_viaddmin_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmax_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vibmin_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax3_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_u16x2", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin3_u32", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimax_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin_s16x2_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_vimin_s32_relu", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_double2half", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_floor", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ceil", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fabs", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_trunc", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_rint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_rcp64h", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_nearbyint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_inv", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_rsqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fmax", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fmin", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_copysign", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dadd_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dsub_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_dmul_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ddiv_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2float_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2int_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2uint_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2hiint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2loint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ll_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double2ull_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_double_as_longlong", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_hiloint2double", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_int2double_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ll2double_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_rd", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_ru", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_ull2double_rz", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_uint2double_rn", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_longlong_as_double", - DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_fma_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_sqrt_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, - {"__devicelib_imf_bfloat162float", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162int_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162short_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ll_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162uint_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ushort_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat162ull_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_float2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat16_as_short", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_bfloat16_as_ushort", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short_as_bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort_as_bfloat16", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ushort2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_uint2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ull2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_short2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_int2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_rd", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_rn", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_ru", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ll2bfloat16_rz", - DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fmabf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fmaxbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fminbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_copysignbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_sqrtbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_rsqrtbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_fabsbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_rintbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_floorbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_ceilbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, - {"__devicelib_imf_truncbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, -}; - -// Each fallback device library corresponds to one bit in "require mask" which -// is an unsigned int32. getDeviceLibBit checks which fallback device library -// is required for FuncName and returns the corresponding bit. The corresponding -// mask for each fallback device library is: -// cl_intel_devicelib_assert: 0x1 -// cl_intel_devicelib_math: 0x2 -// cl_intel_devicelib_math_fp64: 0x4 -// cl_intel_devicelib_complex: 0x8 -// cl_intel_devicelib_complex_fp64: 0x10 -// cl_intel_devicelib_cstring : 0x20 -// cl_intel_devicelib_imf: 0x40 -// cl_intel_devicelib_imf_fp64: 0x80 -// cl_intel_devicelib_imf_bf16: 0x100 -// cl_intel_devicelib_bfloat16: 0x200 -uint32_t getDeviceLibBits(const std::string &FuncName) { - auto DeviceLibFuncIter = SDLMap.find(FuncName); - return ((DeviceLibFuncIter == SDLMap.end()) - ? 0 - : 0x1 << (static_cast(DeviceLibFuncIter->second) - - static_cast( - DeviceLibExt::cl_intel_devicelib_assert))); -} - -} // namespace - -// For each device image module, we go through all functions which meets -// 1. The function name has prefix "__devicelib_" -// 2. The function is declaration which means it doesn't have function body -// And we don't expect non-spirv functions with "__devicelib_" prefix. -uint32_t llvm::getSYCLDeviceLibReqMask(const Module &M) { - // Device libraries will be enabled only for spir-v module. - if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) - return 0; - uint32_t ReqMask = 0; - for (const Function &SF : M) { - if (SF.getName().starts_with(DEVICELIB_FUNC_PREFIX) && SF.isDeclaration()) { - assert(SF.getCallingConv() == CallingConv::SPIR_FUNC); - uint32_t DeviceLibBits = getDeviceLibBits(SF.getName().str()); - ReqMask |= DeviceLibBits; - } - } - return ReqMask; -} - -static llvm::SmallVector BF16DeviceLibFuncs = { - "__devicelib_ConvertFToBF16INTEL", - "__devicelib_ConvertBF16ToFINTEL", - "__devicelib_ConvertFToBF16INTELVec1", - "__devicelib_ConvertBF16ToFINTELVec1", - "__devicelib_ConvertFToBF16INTELVec2", - "__devicelib_ConvertBF16ToFINTELVec2", - "__devicelib_ConvertFToBF16INTELVec3", - "__devicelib_ConvertBF16ToFINTELVec3", - "__devicelib_ConvertFToBF16INTELVec4", - "__devicelib_ConvertBF16ToFINTELVec4", - "__devicelib_ConvertFToBF16INTELVec8", - "__devicelib_ConvertBF16ToFINTELVec8", - "__devicelib_ConvertFToBF16INTELVec16", - "__devicelib_ConvertBF16ToFINTELVec16", -}; - -bool llvm::isSYCLDeviceLibBF16Used(const Module &M) { - if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) - return false; - - for (auto Fn : BF16DeviceLibFuncs) { - Function *BF16Func = M.getFunction(Fn); - if (BF16Func && BF16Func->isDeclaration()) - return true; - } - - return false; -} - -bool llvm::isBF16DeviceLibFuncDecl(const Function &F) { - if (!F.isDeclaration() || !F.getName().starts_with(DEVICELIB_FUNC_PREFIX)) - return false; - for (auto BFunc : BF16DeviceLibFuncs) { - if (!F.getName().compare(BFunc)) - return true; - } - - return false; -} diff --git a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp index 07afffbecf552..f3172adc7769e 100644 --- a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" @@ -123,11 +122,6 @@ PropSetRegTy computeModuleProperties(const Module &M, bool AllowDeviceImageDependencies) { PropSetRegTy PropSet; - { - uint32_t MRMask = getSYCLDeviceLibReqMask(M); - std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); - } { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, computeDeviceRequirements(M, EntryPoints).asMap()); diff --git a/llvm/lib/SYCLPostLink/ModuleSplitter.cpp b/llvm/lib/SYCLPostLink/ModuleSplitter.cpp index 33b4dfb26d740..4c0bf9523d2cb 100644 --- a/llvm/lib/SYCLPostLink/ModuleSplitter.cpp +++ b/llvm/lib/SYCLPostLink/ModuleSplitter.cpp @@ -27,7 +27,7 @@ #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibBF16.h" #include "llvm/SYCLLowerIR/SYCLJointMatrixTransform.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SanitizerPostOptimizer.h" diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index b562c67ff1eb3..2d18edb2ef921 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -195,7 +195,6 @@ PropertyValue &PropertyValue::operator=(const PropertyValue &P) { } constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; -constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[]; constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[]; constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[]; diff --git a/llvm/test/tools/sycl-post-link/device-requirements/mask.ll b/llvm/test/tools/sycl-post-link/device-requirements/mask.ll deleted file mode 100644 index 31b393249bf5f..0000000000000 --- a/llvm/test/tools/sycl-post-link/device-requirements/mask.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: sycl-post-link -properties -split=auto < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.prop - -; CHECK:[SYCL/devicelib req mask] -; CHECK: DeviceLibReqMask=1|64 - -source_filename = "main.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spirv64-unknown-unknown" - -declare spir_func i32 @__devicelib_imf_umulhi(i32 noundef %0, i32 noundef %1) - -; Function Attrs: convergent mustprogress noinline norecurse optnone -define weak_odr dso_local spir_kernel void @kernel() #0 { -entry: - %0 = call i32 @__devicelib_imf_umulhi(i32 0, i32 0) - ret void -} - -attributes #0 = { "sycl-module-id"="main.cpp" } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 1d7d6686e1ad7..8f21122f9e996 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -33,6 +33,7 @@ #include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibBF16.h" #include "llvm/SYCLLowerIR/SYCLJointMatrixTransform.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index e8e5ab4e93bf2..30f39f5c5bc5d 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 0b02c3e25a4f7..9b0d925850f96 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -203,7 +203,6 @@ variables in production code. | `SYCL_PRINT_EXECUTION_GRAPH` | Described [below](#sycl_print_execution_graph-options) | Print execution graph to DOT text file. | | `SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP` | Any(\*) | Disable regular cleanup of enqueued (or finished, in case of host tasks) non-leaf command nodes. If disabled, command nodes will be cleaned up only during the destruction of the last remaining memory object used by them. | | `SYCL_DISABLE_POST_ENQUEUE_CLEANUP` (deprecated) | Any(\*) | Use `SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP` instead. | -| `SYCL_DEVICELIB_INHIBIT_NATIVE` | String of device library extensions (separated by a whitespace) | Do not rely on device native support for devicelib extensions listed in this option. | | `SYCL_PROGRAM_COMPILE_OPTIONS` | String of valid compile options | Override compile options for all programs. | | `SYCL_PROGRAM_LINK_OPTIONS` | String of valid link options | Override link options for all programs. | | `SYCL_PROGRAM_APPEND_COMPILE_OPTIONS` | String of valid compile options | Append to the end of compile options for all programs. | diff --git a/sycl/doc/design/PropertySets.md b/sycl/doc/design/PropertySets.md index 4cfdb552d6622..9c51b7ea86915 100644 --- a/sycl/doc/design/PropertySets.md +++ b/sycl/doc/design/PropertySets.md @@ -72,20 +72,6 @@ constant. See also [SYCL2020-SpecializationConstants.md](./SYCL2020-SpecializationConstants.md). - -### [SYCL/devicelib req mask] - -__Key:__ At most one entry with "DeviceLibReqMask". - -__Value type:__ 32 bit integer. ("1") - -__Value:__ A bitmask of which device libraries the binary uses. - -__Notes:__ - -1. If this property set is missing, no device libraries are used by the binary. - - ### [SYCL/kernel param opt] __Key:__ Kernel name. diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index dd510f70fa4c6..0d833dbe2279a 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -44,9 +44,6 @@ /// PropertySetIO.h #define __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP \ "SYCL/specialization constants default values" -/// TODO: remove req mask when sycl devicelib online linking path is removed. -/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h -#define __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" /// PropertySetRegistry::SYCL_DEVICELIB_METADATA defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_DEVICELIB_METADATA "SYCL/devicelib metadata" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 053597fbc857e..7d032b4ef2e47 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -133,7 +133,6 @@ context_impl::~context_impl() { detail::ProgramManager::getInstance() .getProfileCounterDeviceGlobalEntries(this)) DGEntry->cleanupProfileCounter(this); - MCachedLibPrograms.clear(); // TODO catch an exception and put it to list of asynchronous exceptions getAdapter().call_nocheck(MContext); } catch (std::exception &e) { diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 3b42a6319d223..34e9e394d4649 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -130,27 +130,6 @@ class context_impl : public std::enable_shared_from_this { devices_range getDevices() const { return MDevices; } - using CachedLibProgramsT = - std::map, - Managed>; - - /// In contrast to user programs, which are compiled from user code, library - /// programs come from the SYCL runtime. They are identified by the - /// corresponding extension: - /// - /// cl_intel_devicelib_assert -> # - /// cl_intel_devicelib_complex -> # - /// etc. - /// - /// See `doc/design/DeviceLibExtensions.rst' for - /// more details. - /// - /// \returns an instance of sycl::detail::Locked which wraps a map with device - /// library programs and the corresponding lock for synchronized access. - Locked acquireCachedLibPrograms() { - return {MCachedLibPrograms, MCachedLibProgramsMutex}; - } - KernelProgramCache &getKernelProgramCache() const { return MKernelProgramCache; } @@ -266,8 +245,6 @@ class context_impl : public std::enable_shared_from_this { ur_context_handle_t MContext; platform_impl &MPlatform; property_list MPropList; - CachedLibProgramsT MCachedLibPrograms; - std::mutex MCachedLibProgramsMutex; mutable KernelProgramCache MKernelProgramCache; mutable PropertySupport MSupportBufferLocationByDevices; diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 6e2f0baa4f369..4b3ef86dd1a50 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -188,7 +188,6 @@ RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { SpecConstIDMap.init(Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); SpecConstDefaultValuesMap.init( Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); - DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); DeviceLibMetadata.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_METADATA); KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); ImplicitLocalArg.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPLICIT_LOCAL_ARG); @@ -551,10 +550,6 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( &MergedExportedSymbols, &MergedRegisteredKernels}; // Exclusive merges. - auto MergedDeviceLibReqMask = - exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { - return Img.getDeviceLibReqMask(); - }); auto MergedProgramMetadata = exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getProgramMetadata(); @@ -571,9 +566,8 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( std::array *, - 4> - MergedMaps{&MergedDeviceLibReqMask, &MergedProgramMetadata, - &MergedImportedSymbols, &MergedMisc}; + 3> + MergedMaps{&MergedProgramMetadata, &MergedImportedSymbols, &MergedMisc}; // When merging exported and imported, the exported symbols may cancel out // some of the imported symbols. @@ -676,7 +670,6 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols); CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels); - CopyPropertiesMap(MergedDeviceLibReqMask, DeviceLibReqMask); CopyPropertiesMap(MergedProgramMetadata, ProgramMetadata); CopyPropertiesMap(MergedImportedSymbols, ImportedSymbols); CopyPropertiesMap(MergedMisc, Misc); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 2cd380c91bd65..b4818017268ea 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -216,7 +216,6 @@ class RTDeviceBinaryImage { const PropertyRange &getSpecConstantsDefaultValues() const { return SpecConstDefaultValuesMap; } - const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } const PropertyRange &getDeviceLibMetadata() const { return DeviceLibMetadata; } @@ -255,7 +254,6 @@ class RTDeviceBinaryImage { ur::DeviceBinaryType Format = SYCL_DEVICE_BINARY_TYPE_NONE; RTDeviceBinaryImage::PropertyRange SpecConstIDMap; RTDeviceBinaryImage::PropertyRange SpecConstDefaultValuesMap; - RTDeviceBinaryImage::PropertyRange DeviceLibReqMask; RTDeviceBinaryImage::PropertyRange DeviceLibMetadata; RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange ProgramMetadata; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 8abbe0fdc261f..ba59d2cbb7a5a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -914,19 +914,6 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, NativePrg, Adapter); } - // Link a fallback implementation of device libraries if they are not - // supported by a device compiler. - // Pre-compiled programs (after AOT compilation or read from persitent - // cache) are supposed to be already linked. - // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means - // no fallback device library will be linked. - uint32_t DeviceLibReqMask = 0; - bool UseDeviceLibs = !DeviceCodeWasInCache && - MainImg.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && - !SYCLConfig::get(); - if (UseDeviceLibs) - DeviceLibReqMask = getDeviceLibReqMask(MainImg); - std::vector> ProgramsToLink; // If we had a program in cache, then it should have been the fully linked // program already. @@ -936,8 +923,6 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, // Oth image is the main one and has been handled, skip it. for (std::size_t I = 1; I < ImgWithDeps.getAll().size(); ++I) { const RTDeviceBinaryImage *BinImg = ImgWithDeps.getAll()[I]; - if (UseDeviceLibs) - DeviceLibReqMask |= getDeviceLibReqMask(*BinImg); Managed NativePrg = createURProgram(*BinImg, ContextImpl, Devs); @@ -957,7 +942,7 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, Managed BuiltProgram = build(std::move(NativePrg), ContextImpl, CompileOpts, LinkOpts, - URDevices, DeviceLibReqMask, ProgramsToLink, + URDevices, ProgramsToLink, /*CreatedFromBinary*/ MainImg.getFormat() != SYCL_DEVICE_BINARY_TYPE_SPIRV); @@ -1204,91 +1189,6 @@ ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, return Log; } -// TODO device libraries may use scpecialization constants, manifest files, etc. -// To support that they need to be delivered in a different container - so that -// sycl_device_binary_struct can be created for each of them. -static Managed loadDeviceLib(context_impl &Context, - const char *Name) { - std::string LibSyclDir = OSUtil::getCurrentDSODir(); - std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, - std::ifstream::in | std::ifstream::binary); - if (!File.good()) { - return {}; - } - - File.seekg(0, std::ios::end); - size_t FileSize = File.tellg(); - File.seekg(0, std::ios::beg); - std::vector FileContent(FileSize); - File.read(&FileContent[0], FileSize); - File.close(); - - return createSpirvProgram(Context, (unsigned char *)&FileContent[0], - FileSize); -} - -// For each extension, a pair of library names. The first uses native support, -// the second emulates functionality in software. -static const std::map> - DeviceLibNames = { - {DeviceLibExt::cl_intel_devicelib_assert, - {nullptr, "libsycl-fallback-cassert.spv"}}, - {DeviceLibExt::cl_intel_devicelib_math, - {nullptr, "libsycl-fallback-cmath.spv"}}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, - {nullptr, "libsycl-fallback-cmath-fp64.spv"}}, - {DeviceLibExt::cl_intel_devicelib_complex, - {nullptr, "libsycl-fallback-complex.spv"}}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, - {nullptr, "libsycl-fallback-complex-fp64.spv"}}, - {DeviceLibExt::cl_intel_devicelib_cstring, - {nullptr, "libsycl-fallback-cstring.spv"}}, - {DeviceLibExt::cl_intel_devicelib_imf, - {nullptr, "libsycl-fallback-imf.spv"}}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, - {nullptr, "libsycl-fallback-imf-fp64.spv"}}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, - {nullptr, "libsycl-fallback-imf-bf16.spv"}}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, - {"libsycl-native-bfloat16.spv", "libsycl-fallback-bfloat16.spv"}}}; - -static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) { - auto LibPair = DeviceLibNames.find(Extension); - const char *Lib = nullptr; - if (LibPair != DeviceLibNames.end()) - Lib = Native ? LibPair->second.first : LibPair->second.second; - if (Lib == nullptr) - throw exception(make_error_code(errc::build), - "Unhandled (new?) device library extension"); - return Lib; -} - -// For each extension understood by the SYCL runtime, the string representation -// of its name. Names with devicelib in them are internal to the runtime. Others -// are actual OpenCL extensions. -static const std::map DeviceLibExtensionStrs = { - {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"}, - {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, - "cl_intel_devicelib_math_fp64"}, - {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, - "cl_intel_devicelib_complex_fp64"}, - {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"}, - {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, - "cl_intel_bfloat16_conversions"}}; - -static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { - auto Ext = DeviceLibExtensionStrs.find(Extension); - if (Ext == DeviceLibExtensionStrs.end()) - throw exception(make_error_code(errc::build), - "Unhandled (new?) device library extension"); - return Ext->second; -} - static ur_result_t doCompile(adapter_impl &Adapter, ur_program_handle_t Program, uint32_t NumDevs, ur_device_handle_t *Devs, ur_context_handle_t Ctx, const char *Opts) { @@ -1303,88 +1203,6 @@ static ur_result_t doCompile(adapter_impl &Adapter, ur_program_handle_t Program, return Result; } -static ur_program_handle_t -loadDeviceLibFallback(context_impl &Context, DeviceLibExt Extension, - std::vector &Devices, - bool UseNativeLib) { - - auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); - auto LockedCache = Context.acquireCachedLibPrograms(); - auto &CachedLibPrograms = LockedCache.get(); - // Collect list of devices to compile the library for. Library was already - // compiled for a device if there is a corresponding record in the per-context - // cache. - std::vector DevicesToCompile; - Managed *UrProgram = nullptr; - assert(Devices.size() > 0 && - "At least one device is expected in the input vector"); - // Vector of devices that don't have the library cached. - for (ur_device_handle_t Dev : Devices) { - auto [It, Inserted] = CachedLibPrograms.emplace( - std::make_pair(Extension, Dev), Managed{}); - if (!Inserted) { - Managed &CachedUrProgram = It->second; - assert(CachedUrProgram && "If device lib UR program was cached then is " - "expected to be not a nullptr"); - assert(!UrProgram || *UrProgram == CachedUrProgram); - // Managed::operator& is overloaded, use - // `std::addressof`: - UrProgram = std::addressof(CachedUrProgram); - } else { - DevicesToCompile.push_back(Dev); - } - } - - if (DevicesToCompile.empty()) - return *UrProgram; - - auto EraseProgramForDevices = [&]() { - for (auto Dev : DevicesToCompile) - CachedLibPrograms.erase(std::make_pair(Extension, Dev)); - }; - - Managed NewlyCreated; - // Create UR program for device lib if we don't have it yet. - if (!UrProgram) { - NewlyCreated = loadDeviceLib(Context, LibFileName); - if (NewlyCreated == nullptr) { - EraseProgramForDevices(); - throw exception(make_error_code(errc::build), - std::string("Failed to load ") + LibFileName); - } - } - - // Insert UrProgram into the cache for all devices that we will compile for. - for (auto Dev : DevicesToCompile) { - Managed &Cached = - CachedLibPrograms[std::make_pair(Extension, Dev)]; - if (NewlyCreated) { - Cached = std::move(NewlyCreated); - UrProgram = std::addressof(Cached); - } else { - Cached = UrProgram->retain(); - } - } - - adapter_impl &Adapter = Context.getAdapter(); - // TODO no spec constants are used in the std libraries, support in the future - // Do not use compile options for library programs: it is not clear if user - // options (image options) are supposed to be applied to library program as - // well, and what actually happens to a SPIR-V program if we apply them. - ur_result_t Error = - doCompile(Adapter, *UrProgram, DevicesToCompile.size(), - DevicesToCompile.data(), Context.getHandleRef(), ""); - if (Error != UR_RESULT_SUCCESS) { - EraseProgramForDevices(); - throw detail::set_ur_error( - exception(make_error_code(errc::build), - ProgramManager::getProgramBuildLog(*UrProgram, Context)), - Error); - } - - return *UrProgram; -} - ProgramManager::ProgramManager() : m_SanitizerFoundInImage(SanitizerType::None) { const char *SpvFile = std::getenv(UseSpvEnv); @@ -1581,95 +1399,6 @@ const RTDeviceBinaryImage &ProgramManager::getDeviceImage( return **ImageIterator; } -static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { - uint32_t Mask = - 0x1 << (static_cast(Ext) - - static_cast(DeviceLibExt::cl_intel_devicelib_assert)); - return ((DeviceLibReqMask & Mask) == Mask); -} - -static std::vector -getDeviceLibPrograms(context_impl &Context, - std::vector &Devices, - uint32_t DeviceLibReqMask) { - std::vector Programs; - - std::pair RequiredDeviceLibExt[] = { - {DeviceLibExt::cl_intel_devicelib_assert, - /* is fallback loaded? */ false}, - {DeviceLibExt::cl_intel_devicelib_math, false}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_complex, false}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_cstring, false}, - {DeviceLibExt::cl_intel_devicelib_imf, false}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, false}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, false}}; - - // Disable all devicelib extensions requiring fp64 support if at least - // one underlying device doesn't support cl_khr_fp64. - const bool fp64Support = std::all_of( - Devices.begin(), Devices.end(), [&Context](ur_device_handle_t Device) { - return Context.getPlatformImpl().getDeviceImpl(Device)->has_extension( - "cl_khr_fp64"); - }); - - // Load a fallback library for an extension if the any device does not - // support it. - for (auto Device : Devices) { - // TODO: device_impl::has_extension should cache extension string, then we'd - // be able to use that in the loop below directly. - std::string DevExtList = urGetInfoString( - *Context.getPlatformImpl().getDeviceImpl(Device), - UR_DEVICE_INFO_EXTENSIONS); - - for (auto &Pair : RequiredDeviceLibExt) { - DeviceLibExt Ext = Pair.first; - bool &FallbackIsLoaded = Pair.second; - - if (FallbackIsLoaded) { - continue; - } - - if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) { - continue; - } - - // Skip loading the fallback library that requires fp64 support if any - // device in the list doesn't support fp64. - if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_imf_fp64) && - !fp64Support) { - continue; - } - - auto ExtName = getDeviceLibExtensionStr(Ext); - - bool InhibitNativeImpl = false; - if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { - InhibitNativeImpl = strstr(Env, ExtName) != nullptr; - } - - bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName); - if (!DeviceSupports || InhibitNativeImpl) { - Programs.push_back(loadDeviceLibFallback(Context, Ext, Devices, - /*UseNativeLib=*/false)); - FallbackIsLoaded = true; - } else { - // bfloat16 needs native library if device supports it - if (Ext == DeviceLibExt::cl_intel_devicelib_bfloat16) { - Programs.push_back(loadDeviceLibFallback(Context, Ext, Devices, - /*UseNativeLib=*/true)); - FallbackIsLoaded = true; - } - } - } - } - return Programs; -} - // Check if device image is compressed. static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { @@ -1680,7 +1409,7 @@ static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { Managed ProgramManager::build( Managed Program, context_impl &Context, const std::string &CompileOptions, const std::string &LinkOptions, - std::vector &Devices, uint32_t DeviceLibReqMask, + std::vector &Devices, const std::vector> &ExtraProgramsToLink, bool CreatedFromBinary) { @@ -1688,30 +1417,18 @@ Managed ProgramManager::build( std::cerr << ">>> ProgramManager::build(" << static_cast(Program) << ", " << CompileOptions << ", " << LinkOptions << ", " - << VecToString(Devices) << ", " << std::hex << DeviceLibReqMask - << std::dec << ", " << VecToString(ExtraProgramsToLink) << ", " - << CreatedFromBinary << ")\n"; + << VecToString(Devices) << ", " << std::dec << ", " + << VecToString(ExtraProgramsToLink) << ", " << CreatedFromBinary + << ")\n"; } - bool LinkDeviceLibs = (DeviceLibReqMask != 0); - - // TODO: this is a temporary workaround for GPU tests for ESIMD compiler. - // We do not link with other device libraries, because it may fail - // due to unrecognized SPIR-V format of those libraries. - if (CompileOptions.find(std::string("-cmc")) != std::string::npos || - CompileOptions.find(std::string("-vc-codegen")) != std::string::npos) - LinkDeviceLibs = false; - std::vector LinkPrograms; - if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibPrograms(Context, Devices, DeviceLibReqMask); - } static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK"); static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1'); adapter_impl &Adapter = Context.getAdapter(); - if (LinkPrograms.empty() && ExtraProgramsToLink.empty() && !ForceLink) { + if (ExtraProgramsToLink.empty() && !ForceLink) { const std::string &Options = LinkOptions.empty() ? CompileOptions : (CompileOptions + " " + LinkOptions); @@ -2264,15 +1981,6 @@ void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, F.close(); } -uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { - const RTDeviceBinaryImage::PropertyRange &DLMRange = - Img.getDeviceLibReqMask(); - if (DLMRange.isAvailable()) - return DeviceBinaryProperty(*(DLMRange.begin())).asUint32(); - else - return 0x0; -} - const KernelArgMask * ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, std::string_view KernelName) { @@ -3277,10 +2985,8 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( // No linking of extra programs reqruired. std::vector> ExtraProgramsToLink; std::vector Devs = {DeviceImpl.getHandleRef()}; - auto BuildProgram = - build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, Devs, - /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0, - ExtraProgramsToLink); + auto BuildProgram = build(std::move(ProgramManaged), ContextImpl, CompileOpts, + LinkOpts, Devs, ExtraProgramsToLink); Managed UrKernel{Adapter}; Adapter.call( BuildProgram, KernelName.data(), &UrKernel); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index c7a1a0aafb854..c279fe4934830 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -77,21 +77,6 @@ class devices_range; class queue_impl; class event_impl; class device_images_range; -// DeviceLibExt is shared between sycl runtime and sycl-post-link tool. -// If any update is made here, need to sync with DeviceLibExt definition -// in llvm/tools/sycl-post-link/sycl-post-link.cpp -enum class DeviceLibExt : std::uint32_t { - cl_intel_devicelib_assert, - cl_intel_devicelib_math, - cl_intel_devicelib_math_fp64, - cl_intel_devicelib_complex, - cl_intel_devicelib_complex_fp64, - cl_intel_devicelib_cstring, - cl_intel_devicelib_imf, - cl_intel_devicelib_imf_fp64, - cl_intel_devicelib_imf_bf16, - cl_intel_devicelib_bfloat16, -}; enum class SanitizerType { None, @@ -227,8 +212,6 @@ class ProgramManager { static std::string getProgramBuildLog(const ur_program_handle_t &Program, context_impl &Context); - uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); - /// Returns the mask for eliminated kernel arguments for the requested kernel /// within the native program. /// \param NativePrg the UR program associated with the kernel. @@ -430,7 +413,7 @@ class ProgramManager { Managed build(Managed Program, context_impl &Context, const std::string &CompileOptions, const std::string &LinkOptions, - std::vector &Devices, uint32_t DeviceLibReqMask, + std::vector &Devices, const std::vector> &ProgramsToLink, bool CreatedFromBinary = false); diff --git a/sycl/unittests/helpers/MockDeviceImage.hpp b/sycl/unittests/helpers/MockDeviceImage.hpp index cd85140076c27..81e5b2ba420f4 100644 --- a/sycl/unittests/helpers/MockDeviceImage.hpp +++ b/sycl/unittests/helpers/MockDeviceImage.hpp @@ -161,38 +161,7 @@ template LifetimeExtender(std::vector) -> LifetimeExtender; /// Convenience wrapper for sycl_device_binary_property_set. class MockPropertySet { public: - MockPropertySet(const std::vector &DeviceLibExts = {}) { - // Most of unit-tests are statically linked with SYCL RT. On Linux and Mac - // systems that causes incorrect RT installation directory detection, which - // prevents proper loading of fallback libraries. See intel/llvm#6945 - // - // Fallback libraries are automatically loaded and linked into device image - // unless there is a special property attached to it or special env variable - // is set which forces RT to skip fallback libraries. - // - // By default, property is set to empty mask here so that unit-tests can be - // launched under any environment. Some unit tests might create dummy - // fallback libaries and require fallback libraries to be loaded, in such - // case input vector will be non-empty. - - std::vector Data(/* four elements */ 4, - /* each element is zero */ 0); - if (!DeviceLibExts.empty()) { - uint32_t DeviceLibReqMask = 0; - for (auto Ext : DeviceLibExts) { - DeviceLibReqMask |= 0x1 - << (static_cast(Ext) - - static_cast( - DeviceLibExt::cl_intel_devicelib_assert)); - } - std::memcpy(Data.data(), &DeviceLibReqMask, sizeof(DeviceLibReqMask)); - } - // Name doesn't matter here, it is not used by RT - // Value must be an all-zero 32-bit mask, which would mean that no fallback - // libraries are needed to be loaded. - MockProperty DeviceLibReqMask("", Data, SYCL_PROPERTY_TYPE_UINT32); - insert(__SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK, std::move(DeviceLibReqMask)); - } + MockPropertySet() = default; /// Adds a new property to the set. /// diff --git a/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp b/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp index 81ddb0af61454..63f5a3dec1425 100644 --- a/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp +++ b/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp @@ -28,52 +28,18 @@ using namespace sycl; class MultipleDevsKernelBundleTestKernel; -class DevLibTestKernel; +class MultipleDevsCacheTestKernel; MOCK_INTEGRATION_HEADER(MultipleDevsKernelBundleTestKernel) -MOCK_INTEGRATION_HEADER(DevLibTestKernel) +MOCK_INTEGRATION_HEADER(MultipleDevsCacheTestKernel) using namespace sycl::unittest; -inline void createDummyDeviceLib(sycl::detail::DeviceLibExt Ext) { - // Create a dummy fallback library correpsonding to the extension (if it - // doesn't exist). - std::string ExtName; - switch (Ext) { - case sycl::detail::DeviceLibExt::cl_intel_devicelib_math: - ExtName = "libsycl-fallback-cmath"; - break; - case sycl::detail::DeviceLibExt::cl_intel_devicelib_assert: - ExtName = "libsycl-fallback-cassert"; - break; - default: - FAIL() << "Unknown device library extension"; - } - - auto DSOPath = sycl::detail::OSUtil::getCurrentDSODir(); - std::string LibPath = DSOPath + detail::OSUtil::DirSep + ExtName + ".spv"; - std::ifstream LibFile(LibPath); - if (LibFile.good()) { - LibFile.close(); - } else { - std::ofstream LibFile(LibPath); - LibFile << "0"; - LibFile.close(); - } -} - -// Function to geneate mock device image which uses device libraries. -inline sycl::unittest::MockDeviceImage generateImage( - std::initializer_list KernelNames, - sycl::detail::ur::DeviceBinaryType BinType, const char *DeviceTargetSpec, - const std::vector &DeviceLibExts = {}) { - // Create dummy device libraries if they don't exist. - for (auto Ext : DeviceLibExts) { - createDummyDeviceLib(Ext); - } - - MockPropertySet PropSet(DeviceLibExts); - +inline sycl::unittest::MockDeviceImage +generateImage(std::initializer_list KernelNames, + sycl::detail::ur::DeviceBinaryType BinType, + const char *DeviceTargetSpec) { + MockPropertySet PropSet; std::string Combined; for (auto it = KernelNames.begin(); it != KernelNames.end(); ++it) { if (it != KernelNames.begin()) @@ -99,15 +65,12 @@ inline sycl::unittest::MockDeviceImage generateImage( static sycl::unittest::MockDeviceImage Imgs[3] = { sycl::unittest::generateDefaultImage( {"MultipleDevsKernelBundleTestKernel"}), - generateImage({"DevLibTestKernel"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64, - {sycl::detail::DeviceLibExt::cl_intel_devicelib_math, - sycl::detail::DeviceLibExt::cl_intel_devicelib_assert}), - generateImage({"DevLibTestKernel"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64, - {sycl::detail::DeviceLibExt::cl_intel_devicelib_math, - sycl::detail::DeviceLibExt::cl_intel_devicelib_assert})}; - + generateImage({"MultipleDevsCacheTestKernel"}, + SYCL_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64), + generateImage({"MultipleDevsCacheTestKernel"}, + SYCL_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64)}; static sycl::unittest::MockDeviceImageArray<3> ImgArray{Imgs}; struct MockDeviceData { @@ -325,13 +288,8 @@ TEST_P(MultipleDevsKernelBundleTest, BuildTwiceWithOverlappingDevices) { } // Test to check several use cases for multi-device kernel bundles. -// Test covers AOT and JIT cases. We mock usage of fallback device libaries to -// excersise additional logic in the program manager. Checks are used to test -// that program and device libraries caching works as expected. -TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { - // Unset the SYCL_DEVICELIB_NO_FALLBACK so that fallback libraries are used. - ScopedEnvVar var("SYCL_DEVICELIB_NO_FALLBACK", nullptr, - SYCLConfig::reset); +// Test covers AOT and JIT cases. +TEST_P(MultipleDevsKernelBundleTest, MultipleDevsCache) { std::vector Devices = Plt.get_devices(GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE ? sycl::info::device_type::cpu @@ -360,48 +318,43 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { // Get bundle in executable state for multiple devices in a context, enqueue // a kernel to each device. - sycl::kernel_id KernelID = sycl::get_kernel_id(); + sycl::kernel_id KernelID = + sycl::get_kernel_id(); sycl::kernel_bundle KernelBundleExecutable = sycl::get_kernel_bundle( Context, {Dev1, Dev2, Dev3}, {KernelID}); for (int i = 0; i < 2; i++) { Queues[i].submit([=](sycl::handler &cgh) { cgh.use_kernel_bundle(KernelBundleExecutable); - cgh.single_task([=]() {}); + cgh.single_task([=]() {}); }); Queues[i].wait(); } if (GetParam() == SYCL_DEVICE_BINARY_TYPE_SPIRV) { - // Verify the number of urProgramCreateWithIL calls: we expect 2 calls for - // fallback libraries (assert + math) and 1 call for the main program. - EXPECT_EQ(ProgramCreateWithILCounter, 3) - << "Expect 3 urProgramCreateWithIL calls"; - - // Verify the number of urProgramBuildExp calls: none expected as we - // compile and link in this case. - EXPECT_EQ(ProgramBuildExpCounter, 0) - << "Expect 0 urProgramBuildExp calls"; - - // Verify the number of urProgramCompileExp calls: we expect 2 calls to - // compile fallback libraries and 1 call to compile the main program. - EXPECT_EQ(ProgramCompileExpCounter, 3) - << "Expect 3 urProgramCompileExp calls"; - - // Verify the number of urProgramLinkExp calls: we expect 1 call which - // links the main program and fallback libraries. - EXPECT_EQ(ProgramLinkExpCounter, 1) << "Expect 1 urProgramLinkExp calls"; + // Verify the number of urProgramCreateWithIL calls: we expect 1 call + // for main program + EXPECT_EQ(ProgramCreateWithILCounter, 1) + << "Expect 1 urProgramCreateWithIL calls"; + + // Verify the number of urProgramBuildExp calls: we expect 1 for main + EXPECT_EQ(ProgramBuildExpCounter, 1) + << "Expect 1 urProgramBuildExp calls"; + + // Verify the number of urProgramLinkExp calls: none expected. + EXPECT_EQ(ProgramLinkExpCounter, 0) << "Expect 0 urProgramLinkExp calls"; } + if (GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE) { // In case of AOT compilation, we expect 1 call to // urProgramCreateWithBinary. EXPECT_EQ(ProgramCreateWithBinaryCounter, 1) - << "Expect 3 urProgramCreateWithIL calls"; + << "Expect 1 urProgramCreateWithIL calls"; // And a single call to urProgramBuildExp. In this case libraries are // linked beforehand, so we don't compile/link them online. EXPECT_EQ(ProgramBuildExpCounter, 1) - << "Expect 0 urProgramBuildExp calls"; + << "Expect 1 urProgramBuildExp calls"; } } @@ -409,8 +362,7 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { // Test case 2 // Get bundles in executable state: for pairs of devices excluding dev4 and - // for the new set of devices which includes the dev4. This checks caching - // of the programs and device libraries. + // for the new set of devices which includes the dev4. // Reset counters ProgramCreateWithILCounter = 0; @@ -418,7 +370,8 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { ProgramLinkExpCounter = 0; ProgramCompileExpCounter = 0; ProgramCreateWithBinaryCounter = 0; - sycl::kernel_id KernelID = sycl::get_kernel_id(); + sycl::kernel_id KernelID = + sycl::get_kernel_id(); // Program associated with {dev1, dev2, dev3} is supposed to be cached from // the first test case, we don't expect any additional program creation and // compilation calls for the following bundles because they are all created @@ -437,24 +390,24 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { sycl::get_kernel_bundle(Context, {Dev3}, {KernelID}); EXPECT_EQ(ProgramCreateWithILCounter, 0); + EXPECT_EQ(ProgramBuildExpCounter, 0); EXPECT_EQ(ProgramCompileExpCounter, 0); EXPECT_EQ(ProgramLinkExpCounter, 0); // Next we create a bundle with a different set of devices which includes // dev4, so we expect new UR program creation. Also main program will be - // compiled for new set of devices. Each of device libraries (assert and - // math) will be additionally compiled for dev4, but no program creation is - // expected for device libraries as program handle already exists in the - // per-context cache. + // built for new set of devices. sycl::kernel_bundle KernelBundleExecutableNewSet = sycl::get_kernel_bundle( Context, {Dev2, Dev3, Dev4}, {KernelID}); if (GetParam() == SYCL_DEVICE_BINARY_TYPE_SPIRV) { EXPECT_EQ(ProgramCreateWithILCounter, 1) << "Expect 1 urProgramCreateWithIL calls"; - EXPECT_EQ(ProgramCompileExpCounter, 3) - << "Expect 3 urProgramCompileExp calls"; - EXPECT_EQ(ProgramLinkExpCounter, 1) << "Expect 1 urProgramLinkExp calls"; + EXPECT_EQ(ProgramBuildExpCounter, 1) + << "Expect 1 urProgramBuildExp calls"; + EXPECT_EQ(ProgramCompileExpCounter, 0) + << "Expect 0 urProgramCompileExp calls"; + EXPECT_EQ(ProgramLinkExpCounter, 0) << "Expect 0 urProgramLinkExp calls"; } if (GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE) { @@ -467,20 +420,17 @@ TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { for (int i = 0; i < 3; i++) { Queues[0].submit([=](sycl::handler &cgh) { cgh.use_kernel_bundle(KernelBundleExecutableSubset1); - cgh.single_task([=]() {}); + cgh.single_task([=]() {}); }); Queues[0].wait(); Queues[2].submit([=](sycl::handler &cgh) { cgh.use_kernel_bundle(KernelBundleExecutableNewSet); - cgh.single_task([=]() {}); + cgh.single_task([=]() {}); }); Queues[2].wait(); } } - - // Reset the SYCL_DEVICELIB_NO_FALLBACK to its original value. - sycl::detail::SYCLConfig::reset(); } // The following helpers and test verify persistent cache usage when we have From 3332205105d12a4552dcadaf44de0f15cda40390 Mon Sep 17 00:00:00 2001 From: Adam Fidel Date: Mon, 22 Sep 2025 09:35:51 -0700 Subject: [PATCH 3/8] add test for partitioned wait --- .../Graph/RecordReplay/partitioned_wait.cpp | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp diff --git a/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp new file mode 100644 index 0000000000000..30db4c0822ebd --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp @@ -0,0 +1,144 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests partitioned wait feature in SYCL Graph. +// This test demonstrates how queue.wait() calls during recording create +// dummy nodes that partition the graph into "before", barrier, and "after" subgraphs. + +#include "../graph_common.hpp" + +#include + +int main() { + property_list Properties{property::queue::in_order{}}; + queue Queue{Properties}; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 100; + int *A = malloc_device(N, Queue); + int *B = malloc_device(N, Queue); + int *C = malloc_device(N, Queue); + int *D = malloc_device(N, Queue); + + // Initialize data outside recording + Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + A[it] = static_cast(it); + B[it] = 0; + C[it] = 0; + D[it] = 0; + }); + }).wait(); + + // Begin recording the graph + Graph.begin_recording(Queue); + + // Part 1: "Before" subgraph operations + auto Event1 = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + B[it] = A[it] * 2; // B = A * 2 + }); + }); + + auto Event2 = Queue.submit([&](handler &CGH) { + CGH.depends_on(Event1); + CGH.parallel_for(N, [=](id<1> it) { + C[it] = B[it] + 1; // C = B + 1 = A * 2 + 1 + }); + }); + + // This queue.wait() should create a dummy barrier node in the graph + // instead of asserting, partitioning the graph into before/after sections + Queue.wait(); + + // Part 2: "After" subgraph operations + // These operations should be in the "after" partition + auto Event3 = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + D[it] = C[it] * 3; // D = C * 3 = (A * 2 + 1) * 3 + }); + }); + + // Another wait should create another barrier + Queue.wait(); + + // More operations after second barrier + Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + D[it] = D[it] + A[it]; // D = D + A = (A * 2 + 1) * 3 + A = 6*A + 3 + A = 7*A + 3 + }); + }); + + Graph.end_recording(); + + // Finalize the graph - this should trigger partitioning + auto ExecGraph = Graph.finalize(); + + // Execute the partitioned graph + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); + Queue.wait_and_throw(); + + // Verify results + std::vector OutputA(N), OutputB(N), OutputC(N), OutputD(N); + Queue.memcpy(OutputA.data(), A, N * sizeof(int)).wait(); + Queue.memcpy(OutputB.data(), B, N * sizeof(int)).wait(); + Queue.memcpy(OutputC.data(), C, N * sizeof(int)).wait(); + Queue.memcpy(OutputD.data(), D, N * sizeof(int)).wait(); + + // Check results + for (size_t i = 0; i < N; i++) { + int expected_a = static_cast(i); + int expected_b = expected_a * 2; + int expected_c = expected_b + 1; + int expected_d = 7 * expected_a + 3; + + assert(check_value(i, expected_a, OutputA[i], "A")); + assert(check_value(i, expected_b, OutputB[i], "B")); + assert(check_value(i, expected_c, OutputC[i], "C")); + assert(check_value(i, expected_d, OutputD[i], "D")); + } + + // Test multiple execution of the partitioned graph + // Reset data + Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](id<1> it) { + A[it] = static_cast(i) + 10; // Different input + B[it] = 0; + C[it] = 0; + D[it] = 0; + }); + }).wait(); + + // Execute again + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); + Queue.wait_and_throw(); + + // Verify second execution + Queue.memcpy(OutputA.data(), A, N * sizeof(int)).wait(); + Queue.memcpy(OutputB.data(), B, N * sizeof(int)).wait(); + Queue.memcpy(OutputC.data(), C, N * sizeof(int)).wait(); + Queue.memcpy(OutputD.data(), D, N * sizeof(int)).wait(); + + for (size_t i = 0; i < N; i++) { + int expected_a = static_cast(i) + 10; + int expected_b = expected_a * 2; + int expected_c = expected_b + 1; + int expected_d = 7 * expected_a + 3; + + assert(check_value(i, expected_a, OutputA[i], "A (second execution)")); + assert(check_value(i, expected_b, OutputB[i], "B (second execution)")); + assert(check_value(i, expected_c, OutputC[i], "C (second execution)")); + assert(check_value(i, expected_d, OutputD[i], "D (second execution)")); + } + + // Clean up + sycl::free(A, Queue); + sycl::free(B, Queue); + sycl::free(C, Queue); + sycl::free(D, Queue); + + return 0; +} From a55867cf743671204fb4280c3c64504937967fd4 Mon Sep 17 00:00:00 2001 From: Adam Fidel Date: Mon, 22 Sep 2025 11:52:20 -0700 Subject: [PATCH 4/8] fix typo --- .../Graph/RecordReplay/partitioned_wait.cpp | 25 +++++-------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp index 30db4c0822ebd..e4ef0ff783862 100644 --- a/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp @@ -4,8 +4,6 @@ // RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // Tests partitioned wait feature in SYCL Graph. -// This test demonstrates how queue.wait() calls during recording create -// dummy nodes that partition the graph into "before", barrier, and "after" subgraphs. #include "../graph_common.hpp" @@ -23,7 +21,6 @@ int main() { int *C = malloc_device(N, Queue); int *D = malloc_device(N, Queue); - // Initialize data outside recording Queue.submit([&](handler &CGH) { CGH.parallel_for(N, [=](id<1> it) { A[it] = static_cast(it); @@ -39,45 +36,38 @@ int main() { // Part 1: "Before" subgraph operations auto Event1 = Queue.submit([&](handler &CGH) { CGH.parallel_for(N, [=](id<1> it) { - B[it] = A[it] * 2; // B = A * 2 + B[it] = A[it] * 2; }); }); auto Event2 = Queue.submit([&](handler &CGH) { CGH.depends_on(Event1); CGH.parallel_for(N, [=](id<1> it) { - C[it] = B[it] + 1; // C = B + 1 = A * 2 + 1 + C[it] = B[it] + 1; }); }); - // This queue.wait() should create a dummy barrier node in the graph - // instead of asserting, partitioning the graph into before/after sections + // should create a dummy barrier node in the graph Queue.wait(); // Part 2: "After" subgraph operations - // These operations should be in the "after" partition auto Event3 = Queue.submit([&](handler &CGH) { CGH.parallel_for(N, [=](id<1> it) { - D[it] = C[it] * 3; // D = C * 3 = (A * 2 + 1) * 3 + D[it] = C[it] * 3; }); }); - // Another wait should create another barrier Queue.wait(); - // More operations after second barrier Queue.submit([&](handler &CGH) { CGH.parallel_for(N, [=](id<1> it) { - D[it] = D[it] + A[it]; // D = D + A = (A * 2 + 1) * 3 + A = 6*A + 3 + A = 7*A + 3 + D[it] = D[it] + A[it]; }); }); Graph.end_recording(); - // Finalize the graph - this should trigger partitioning auto ExecGraph = Graph.finalize(); - - // Execute the partitioned graph Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); Queue.wait_and_throw(); @@ -88,7 +78,6 @@ int main() { Queue.memcpy(OutputC.data(), C, N * sizeof(int)).wait(); Queue.memcpy(OutputD.data(), D, N * sizeof(int)).wait(); - // Check results for (size_t i = 0; i < N; i++) { int expected_a = static_cast(i); int expected_b = expected_a * 2; @@ -101,18 +90,16 @@ int main() { assert(check_value(i, expected_d, OutputD[i], "D")); } - // Test multiple execution of the partitioned graph // Reset data Queue.submit([&](handler &CGH) { CGH.parallel_for(N, [=](id<1> it) { - A[it] = static_cast(i) + 10; // Different input + A[it] = static_cast(it) + 10; // Different input B[it] = 0; C[it] = 0; D[it] = 0; }); }).wait(); - // Execute again Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); Queue.wait_and_throw(); From 69f001f64cdc566ecce31c04095336c2ead3d0fd Mon Sep 17 00:00:00 2001 From: Adam Fidel Date: Mon, 22 Sep 2025 12:49:04 -0700 Subject: [PATCH 5/8] add initial implementation with barriers --- sycl/source/detail/queue_impl.cpp | 30 +++++++++++++++++-- .../Graph/RecordReplay/partitioned_wait.cpp | 8 ++--- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 50fd63b05b291..0b7fc24b08d66 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -890,9 +893,30 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { #endif if (!MGraph.expired()) { - throw sycl::exception(make_error_code(errc::invalid), - "wait cannot be called for a queue which is " - "recording to a command graph."); + auto GraphImpl = MGraph.lock(); + + // Add a barrier node to the graph to create a partition point. + // TODO: test if partitioned wait bits are set + if (GraphImpl) { + std::vector EmptyWaitList; + auto BarrierCG = std::make_shared( + std::move(EmptyWaitList), + ext::oneapi::experimental::event_mode_enum::none, + detail::CG::StorageInitHelper{}, + detail::CGType::BarrierWaitlist, + CodeLoc + ); + + std::vector EmptyDeps; + ext::oneapi::experimental::detail::node_impl &BarrierNode = GraphImpl->add( + ext::oneapi::experimental::node_type::ext_oneapi_barrier, + std::static_pointer_cast(BarrierCG), + EmptyDeps + ); + + GraphImpl->setBarrierDep(shared_from_this(), BarrierNode); + } + return; } // If there is an external event set, we know we are using an in-order queue diff --git a/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp index e4ef0ff783862..695e7a3fd8ce0 100644 --- a/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/partitioned_wait.cpp @@ -82,7 +82,7 @@ int main() { int expected_a = static_cast(i); int expected_b = expected_a * 2; int expected_c = expected_b + 1; - int expected_d = 7 * expected_a + 3; + int expected_d = expected_c * 3 + expected_a; assert(check_value(i, expected_a, OutputA[i], "A")); assert(check_value(i, expected_b, OutputB[i], "B")); @@ -90,7 +90,7 @@ int main() { assert(check_value(i, expected_d, OutputD[i], "D")); } - // Reset data + // Reset data and verify with new input Queue.submit([&](handler &CGH) { CGH.parallel_for(N, [=](id<1> it) { A[it] = static_cast(it) + 10; // Different input @@ -103,7 +103,6 @@ int main() { Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); Queue.wait_and_throw(); - // Verify second execution Queue.memcpy(OutputA.data(), A, N * sizeof(int)).wait(); Queue.memcpy(OutputB.data(), B, N * sizeof(int)).wait(); Queue.memcpy(OutputC.data(), C, N * sizeof(int)).wait(); @@ -113,7 +112,7 @@ int main() { int expected_a = static_cast(i) + 10; int expected_b = expected_a * 2; int expected_c = expected_b + 1; - int expected_d = 7 * expected_a + 3; + int expected_d = expected_c * 3 + expected_a; assert(check_value(i, expected_a, OutputA[i], "A (second execution)")); assert(check_value(i, expected_b, OutputB[i], "B (second execution)")); @@ -121,7 +120,6 @@ int main() { assert(check_value(i, expected_d, OutputD[i], "D (second execution)")); } - // Clean up sycl::free(A, Queue); sycl::free(B, Queue); sycl::free(C, Queue); From 5274d1e76c6bd9bfce6b95f476efa64204c9ea77 Mon Sep 17 00:00:00 2001 From: Adam Fidel Date: Mon, 22 Sep 2025 14:12:15 -0700 Subject: [PATCH 6/8] add alternative non-barrier implementation --- sycl/source/detail/queue_impl.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 0b7fc24b08d66..da39485972ab4 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -898,6 +898,9 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { // Add a barrier node to the graph to create a partition point. // TODO: test if partitioned wait bits are set if (GraphImpl) { +//#define PARTITIONED_WAIT_IMPL_BARRIERS 1 +#define PARTITIONED_WAIT_IMPL_EMPTY_NODE 1 +#if PARTITIONED_WAIT_IMPL_BARRIERS std::vector EmptyWaitList; auto BarrierCG = std::make_shared( std::move(EmptyWaitList), @@ -915,6 +918,20 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { ); GraphImpl->setBarrierDep(shared_from_this(), BarrierNode); +#elif PARTITIONED_WAIT_IMPL_EMPTY_NODE + auto EmptyCG = std::make_shared( + detail::CGType::None, + detail::CG::StorageInitHelper{}, + CodeLoc + ); + + std::vector EmptyDeps; + ext::oneapi::experimental::detail::node_impl &EmptyNode = GraphImpl->add( + ext::oneapi::experimental::node_type::empty, + EmptyCG, + EmptyDeps + ); +#endif } return; } From 680595339bdeda80550e7b859d2d54b7cb852796 Mon Sep 17 00:00:00 2001 From: Adam Fidel Date: Wed, 1 Oct 2025 13:02:42 -0700 Subject: [PATCH 7/8] introduce new host sync node --- .../ext/oneapi/experimental/graph/node.hpp | 3 +- sycl/source/detail/event_impl.cpp | 26 +++- sycl/source/detail/graph/graph_impl.cpp | 136 +++++++++++++++--- sycl/source/detail/queue_impl.cpp | 30 +--- 4 files changed, 144 insertions(+), 51 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp index 0978fa4b8eb23..0bc4872f76641 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph/node.hpp @@ -42,7 +42,8 @@ enum class node_type { host_task = 9, native_command = 10, async_malloc = 11, - async_free = 12 + async_free = 12, + host_sync = 13 }; /// Class representing a node in the graph, returned by command_graph::add(). diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 6b7957bf781bd..7c4822d9fc5bd 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -272,10 +272,30 @@ void event_impl::wait(bool *Success) { throw sycl::exception(make_error_code(errc::invalid), "wait method cannot be used for a discarded event."); + printf("FOOBAR3 %d\n", MGraph.expired()); + + if (!MGraph.expired()) { - throw sycl::exception(make_error_code(errc::invalid), - "wait method cannot be used for an event associated " - "with a command graph."); + auto GraphImpl = MGraph.lock(); + + // Add a host sync node to the graph to create a partition point. + // TODO: test if partitioned wait bits are set + if (GraphImpl) { + auto EmptyCG = std::make_shared( + detail::CGType::None, + detail::CG::StorageInitHelper{} + ); + + std::vector EmptyDeps; + ext::oneapi::experimental::detail::node_impl &HostSyncNode = GraphImpl->add( + ext::oneapi::experimental::node_type::host_sync, + EmptyCG, + EmptyDeps + ); + + printf("FOOBAR2\n"); + + } } #ifdef XPTI_ENABLE_INSTRUMENTATION diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 213ddc14b63a9..a76620fffc3b8 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -18,6 +18,7 @@ #include // for kernel_impl #include // ProgramManager #include // for queue_impl +#include // for debug output #include // for SYCLMemObjT #include // for stack #include // for tls_code_loc_t etc.. @@ -68,6 +69,8 @@ inline const char *nodeTypeToString(node_type NodeType) { return "async_malloc"; case node_type::async_free: return "async_free"; + case node_type::host_sync: + return "host_sync"; } assert(false && "Unhandled node type"); return {}; @@ -140,19 +143,19 @@ void propagatePartitionUp(node_impl &Node, int PartitionNum) { /// remain. /// @param Node Node to assign to the partition. /// @param PartitionNum Number to propagate. -/// @param HostTaskList List of host tasks that have already been processed and +/// @param CutVertexList List of tasks that have already been processed and /// are encountered as successors to the node Node. void propagatePartitionDown(node_impl &Node, int PartitionNum, - std::list &HostTaskList) { + std::list &CutVertexList) { if (Node.MCGType == sycl::detail::CGType::CodeplayHostTask) { if (Node.MPartitionNum != -1) { - HostTaskList.push_front(&Node); + CutVertexList.push_front(&Node); } return; } Node.MPartitionNum = PartitionNum; for (node_impl &Successor : Node.successors()) { - propagatePartitionDown(Successor, PartitionNum, HostTaskList); + propagatePartitionDown(Successor, PartitionNum, CutVertexList); } } @@ -180,24 +183,50 @@ void partition::updateSchedule() { void exec_graph_impl::makePartitions() { int CurrentPartition = -1; - std::list HostTaskList; + std::list CutVertexList; + +#define SYCL_GRAPH_DEBUG 1 +#ifdef SYCL_GRAPH_DEBUG + // Debug: Print total number of nodes + std::cout << "[DEBUG] makePartitions: Starting with " << MNodeStorage.size() << " nodes" << std::endl; + + // Debug: Print all nodes and their types + int nodeIndex = 0; + for (node_impl &Node : nodes()) { + std::cout << "[DEBUG] Node " << nodeIndex << ": Type=" << nodeTypeToString(Node.MNodeType) + << ", CGType=" << static_cast(Node.MCGType) << std::endl; + nodeIndex++; + } +#endif + + // A cut vertex is a node that, when removed, increases the number of connected components + // in the graph. In our case, cut vertices are host-tasks / sync tasks that separate partitions + auto const IsCutVertex = [](node_impl const& node) { + return node.MCGType == sycl::detail::CGType::CodeplayHostTask || + node.MNodeType == node_type::host_sync; + }; + // find all the host-tasks in the graph for (node_impl &Node : nodes()) { - if (Node.MCGType == sycl::detail::CGType::CodeplayHostTask) { - HostTaskList.push_back(&Node); + if (IsCutVertex(Node)) { + CutVertexList.push_back(&Node); } } - MContainsHostTask = HostTaskList.size() > 0; + MContainsHostTask = CutVertexList.size() > 0; +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Found " << CutVertexList.size() << " host tasks, MContainsHostTask=" + << (MContainsHostTask ? "true" : "false") << std::endl; +#endif // Annotate nodes // The first step in graph partitioning is to annotate all nodes of the graph // with a temporary partition or group number. This step allows us to group // the graph nodes into sets of nodes with kind of meta-dependencies that must // be enforced by the runtime. For example, Group 2 depends on Groups 0 and 1, // which means that we should not try to run Group 2 before Groups 0 and 1 - // have finished executing. Since host-tasks are currently the only tasks that + // have finished executing. Since host-tasks and sync-tasks are the only tasks that // require runtime dependency handling, groups of nodes are created from - // host-task nodes. We therefore loop over all the host-task nodes, and for + // these nodes. We therefore loop over all the host-task and sync-task nodes, and for // each node: // - Its predecessors are assigned to group number `n-1` // - The node itself constitutes a group, group number `n` @@ -213,23 +242,32 @@ void exec_graph_impl::makePartitions() { // case, the host-task node `A` must be reprocessed after the node `B` and the // group that includes the predecessor of `B` can be merged with the group of // the predecessors of the node `A`. - while (HostTaskList.size() > 0) { - node_impl &Node = *HostTaskList.front(); - HostTaskList.pop_front(); + while (CutVertexList.size() > 0) { + node_impl &Node = *CutVertexList.front(); + CutVertexList.pop_front(); +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Processing host task node, CurrentPartition=" << CurrentPartition << std::endl; +#endif CurrentPartition++; for (node_impl &Predecessor : Node.predecessors()) { propagatePartitionUp(Predecessor, CurrentPartition); } CurrentPartition++; Node.MPartitionNum = CurrentPartition; +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Assigned host task to partition " << CurrentPartition << std::endl; +#endif CurrentPartition++; - auto TmpSize = HostTaskList.size(); + auto TmpSize = CutVertexList.size(); for (node_impl &Successor : Node.successors()) { - propagatePartitionDown(Successor, CurrentPartition, HostTaskList); + propagatePartitionDown(Successor, CurrentPartition, CutVertexList); } - if (HostTaskList.size() > TmpSize) { + if (CutVertexList.size() > TmpSize) { +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Host task list size increased, merging partitions" << std::endl; +#endif // At least one HostTask has been re-numbered so group merge opportunities - for (node_impl *HT : HostTaskList) { + for (node_impl *HT : CutVertexList) { auto HTPartitionNum = HT->MPartitionNum; if (HTPartitionNum != -1) { // can merge predecessors of node `Node` with predecessors of node @@ -246,14 +284,32 @@ void exec_graph_impl::makePartitions() { } } +#ifdef SYCL_GRAPH_DEBUG + // Debug: Print node partition assignments before creating partitions + std::cout << "[DEBUG] Node partition assignments:" << std::endl; + nodeIndex = 0; + for (node_impl &Node : nodes()) { + std::cout << "[DEBUG] Node " << nodeIndex << ": Partition=" << Node.MPartitionNum + << ", Type=" << nodeTypeToString(Node.MNodeType) << std::endl; + nodeIndex++; + } +#endif + // Create partitions int PartitionFinalNum = 0; +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Creating partitions from " << -1 << " to " << CurrentPartition << std::endl; +#endif for (int i = -1; i <= CurrentPartition; i++) { const std::shared_ptr &Partition = std::make_shared(); + int nodesInPartition = 0; + int rootsInPartition = 0; for (node_impl &Node : nodes()) { if (Node.MPartitionNum == i) { + nodesInPartition++; MPartitionNodes[&Node] = PartitionFinalNum; if (isPartitionRoot(Node)) { + rootsInPartition++; Partition->MRoots.insert(&Node); if (Node.MCGType == CGType::CodeplayHostTask) { Partition->MIsHostTask = true; @@ -261,21 +317,37 @@ void exec_graph_impl::makePartitions() { } } } +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Partition " << i << ": " << nodesInPartition << " nodes, " + << rootsInPartition << " roots" << std::endl; +#endif if (Partition->MRoots.size() > 0) { Partition->updateSchedule(); Partition->MIsInOrderGraph = Partition->checkIfGraphIsSinglePath(); MPartitions.push_back(Partition); MRootPartitions.push_back(Partition); +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Added partition " << PartitionFinalNum << " (original " << i + << "), IsHostTask=" << (Partition->MIsHostTask ? "true" : "false") + << ", IsInOrder=" << (Partition->MIsInOrderGraph ? "true" : "false") << std::endl; +#endif PartitionFinalNum++; } } // Add an empty partition if there is no partition, i.e. empty graph if (MPartitions.empty()) { +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] No partitions created, adding empty partition" << std::endl; +#endif MPartitions.push_back(std::make_shared()); MRootPartitions.push_back(MPartitions[0]); } +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Final result: " << MPartitions.size() << " partitions created" << std::endl; +#endif + // Make global schedule list for (const auto &Partition : MPartitions) { MSchedule.insert(MSchedule.end(), Partition->MSchedule.begin(), @@ -283,22 +355,39 @@ void exec_graph_impl::makePartitions() { } // Compute partition dependencies + int partitionIdx = 0; for (const auto &Partition : MPartitions) { + int predecessorCount = 0; + int successorCount = 0; for (node_impl &Root : Partition->roots()) { for (node_impl &NodeDep : Root.predecessors()) { auto &Predecessor = MPartitions[MPartitionNodes[&NodeDep]]; Partition->MPredecessors.push_back(Predecessor.get()); Predecessor->MSuccessors.push_back(Partition.get()); + predecessorCount++; } } + for (auto &Succ : Partition->MSuccessors) { + successorCount++; + } +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] Partition " << partitionIdx << " dependencies: " + << predecessorCount << " predecessors, " << successorCount << " successors" << std::endl; +#endif + partitionIdx++; } // Reset node groups (if node have to be re-processed - e.g. subgraph) for (node_impl &Node : nodes()) { Node.MPartitionNum = -1; } + +#ifdef SYCL_GRAPH_DEBUG + std::cout << "[DEBUG] makePartitions completed" << std::endl; +#endif } + graph_impl::graph_impl(const sycl::context &SyclContext, const sycl::device &SyclDevice, const sycl::property_list &PropList) @@ -699,6 +788,7 @@ void graph_impl::beginRecordingUnlockedQueue(sycl::detail::queue_impl &Queue) { void graph_impl::beginRecording(sycl::detail::queue_impl &Queue) { graph_impl::WriteLock Lock(MMutex); + printf("Graph %p beginRecording on Queue %p\n", this, &Queue); if (!Queue.hasCommandGraph()) { Queue.setCommandGraph(shared_from_this()); addQueue(Queue); @@ -970,9 +1060,9 @@ exec_graph_impl::~exec_graph_impl() { // Clean up any execution events which have finished so we don't pass them // to the scheduler. -static void cleanupExecutionEvents(std::vector &ExecutionEvents) { +static void cleanupExecutionEvents(std::vector &ExecutionEvents) { - auto Predicate = [](EventImplPtr &EventPtr) { + auto Predicate = [](sycl::detail::EventImplPtr &EventPtr) { return EventPtr->isCompleted(); }; @@ -981,7 +1071,7 @@ static void cleanupExecutionEvents(std::vector &ExecutionEvents) { ExecutionEvents.end()); } -EventImplPtr exec_graph_impl::enqueueHostTaskPartition( +sycl::detail::EventImplPtr exec_graph_impl::enqueueHostTaskPartition( std::shared_ptr &Partition, sycl::detail::queue_impl &Queue, sycl::detail::CG::StorageInitHelper CGData, bool EventNeeded) { @@ -1019,7 +1109,7 @@ EventImplPtr exec_graph_impl::enqueueHostTaskPartition( return nullptr; } -EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler( +sycl::detail::EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler( std::shared_ptr &Partition, sycl::detail::queue_impl &Queue, sycl::detail::CG::StorageInitHelper CGData, bool EventNeeded) { @@ -1049,7 +1139,7 @@ EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler( return nullptr; } -EventImplPtr exec_graph_impl::enqueuePartitionDirectly( +sycl::detail::EventImplPtr exec_graph_impl::enqueuePartitionDirectly( std::shared_ptr &Partition, sycl::detail::queue_impl &Queue, std::vector &WaitEvents, bool EventNeeded) { @@ -1089,7 +1179,7 @@ EventImplPtr exec_graph_impl::enqueuePartitionDirectly( } } -EventImplPtr +sycl::detail::EventImplPtr exec_graph_impl::enqueuePartitions(sycl::detail::queue_impl &Queue, sycl::detail::CG::StorageInitHelper &CGData, bool IsCGDataSafeForSchedulerBypass, diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index da39485972ab4..d4ba46206f12f 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -891,47 +891,29 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId); } #endif + printf("FOOBAR4 %d\n", MGraph.expired()); + if (!MGraph.expired()) { auto GraphImpl = MGraph.lock(); - // Add a barrier node to the graph to create a partition point. // TODO: test if partitioned wait bits are set if (GraphImpl) { -//#define PARTITIONED_WAIT_IMPL_BARRIERS 1 -#define PARTITIONED_WAIT_IMPL_EMPTY_NODE 1 -#if PARTITIONED_WAIT_IMPL_BARRIERS - std::vector EmptyWaitList; - auto BarrierCG = std::make_shared( - std::move(EmptyWaitList), - ext::oneapi::experimental::event_mode_enum::none, - detail::CG::StorageInitHelper{}, - detail::CGType::BarrierWaitlist, - CodeLoc - ); - - std::vector EmptyDeps; - ext::oneapi::experimental::detail::node_impl &BarrierNode = GraphImpl->add( - ext::oneapi::experimental::node_type::ext_oneapi_barrier, - std::static_pointer_cast(BarrierCG), - EmptyDeps - ); - GraphImpl->setBarrierDep(shared_from_this(), BarrierNode); -#elif PARTITIONED_WAIT_IMPL_EMPTY_NODE auto EmptyCG = std::make_shared( detail::CGType::None, detail::CG::StorageInitHelper{}, CodeLoc ); + printf("FOOBAR1\n"); + std::vector EmptyDeps; - ext::oneapi::experimental::detail::node_impl &EmptyNode = GraphImpl->add( - ext::oneapi::experimental::node_type::empty, + GraphImpl->add( + ext::oneapi::experimental::node_type::host_sync, EmptyCG, EmptyDeps ); -#endif } return; } From 25db2c87e9f80dc063405ea84de029f1b3f2e719 Mon Sep 17 00:00:00 2001 From: Adam Fidel Date: Tue, 7 Oct 2025 08:52:56 -0700 Subject: [PATCH 8/8] Remove unneeded changes --- sycl/source/detail/graph/graph_impl.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index a76620fffc3b8..9aab0f1c166f0 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -1060,9 +1060,9 @@ exec_graph_impl::~exec_graph_impl() { // Clean up any execution events which have finished so we don't pass them // to the scheduler. -static void cleanupExecutionEvents(std::vector &ExecutionEvents) { +static void cleanupExecutionEvents(std::vector &ExecutionEvents) { - auto Predicate = [](sycl::detail::EventImplPtr &EventPtr) { + auto Predicate = [](EventImplPtr &EventPtr) { return EventPtr->isCompleted(); }; @@ -1071,7 +1071,7 @@ static void cleanupExecutionEvents(std::vector &Exec ExecutionEvents.end()); } -sycl::detail::EventImplPtr exec_graph_impl::enqueueHostTaskPartition( +EventImplPtr exec_graph_impl::enqueueHostTaskPartition( std::shared_ptr &Partition, sycl::detail::queue_impl &Queue, sycl::detail::CG::StorageInitHelper CGData, bool EventNeeded) { @@ -1109,7 +1109,7 @@ sycl::detail::EventImplPtr exec_graph_impl::enqueueHostTaskPartition( return nullptr; } -sycl::detail::EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler( +EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler( std::shared_ptr &Partition, sycl::detail::queue_impl &Queue, sycl::detail::CG::StorageInitHelper CGData, bool EventNeeded) { @@ -1139,7 +1139,7 @@ sycl::detail::EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler( return nullptr; } -sycl::detail::EventImplPtr exec_graph_impl::enqueuePartitionDirectly( +EventImplPtr exec_graph_impl::enqueuePartitionDirectly( std::shared_ptr &Partition, sycl::detail::queue_impl &Queue, std::vector &WaitEvents, bool EventNeeded) { @@ -1179,7 +1179,7 @@ sycl::detail::EventImplPtr exec_graph_impl::enqueuePartitionDirectly( } } -sycl::detail::EventImplPtr +EventImplPtr exec_graph_impl::enqueuePartitions(sycl::detail::queue_impl &Queue, sycl::detail::CG::StorageInitHelper &CGData, bool IsCGDataSafeForSchedulerBypass,