From 4efb6151273d9a62791b0f50572804efe8ee8764 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 7 Mar 2025 15:06:51 +0000 Subject: [PATCH 01/12] [SYCL][NativeCPU] Build libclc target-independently. This change creates a Clang NativeCPU target to be used for creating LLVM IR for NativeCPU, builds libclc with this target, and does the initial processing of NativeCPU device code with this target. This allows the libclc to be independent of the concrete host target, allowing libclc to be re-used across different host targets, and across different ABIs for the same host target. --- clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/TargetInfo.cpp | 35 --- clang/lib/Basic/Targets.cpp | 8 + clang/lib/Basic/Targets/NativeCPU.cpp | 109 +++++++++ clang/lib/Basic/Targets/NativeCPU.h | 70 ++++++ clang/lib/Driver/Driver.cpp | 30 +-- clang/lib/Driver/OffloadBundler.cpp | 3 +- clang/lib/Driver/ToolChains/Clang.cpp | 23 +- clang/lib/Driver/ToolChains/SYCL.cpp | 24 +- clang/lib/Driver/ToolChains/SYCL.h | 30 +-- .../ClangLinkerWrapper.cpp | 10 +- .../clang-offload-deps/ClangOffloadDeps.cpp | 7 +- libclc/CMakeLists.txt | 36 +-- .../SOURCES | 0 .../minmax_helpers.ll | 0 .../integer/popcount.cl | 0 .../math/clc_sqrt.cl | 0 .../math/fma.cl | 0 .../math/helpers.h | 0 .../math/native_cos.cl | 0 .../math/native_exp.cl | 0 .../math/native_exp2.cl | 0 .../math/native_log.cl | 0 .../math/native_log10.cl | 0 .../math/native_log2.cl | 0 .../math/native_sin.cl | 0 .../math/native_sqrt.cl | 0 .../math/round.cl | 0 .../math/sqrt.cl | 0 libdevice/nativecpu_utils.cpp | 15 +- .../FixABIMuxBuiltinsSYCLNativeCPU.h | 29 --- llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt | 1 - .../FixABIMuxBuiltinsSYCLNativeCPU.cpp | 226 ------------------ .../PipelineSYCLNativeCPU.cpp | 2 - 34 files changed, 264 insertions(+), 395 deletions(-) create mode 100644 clang/lib/Basic/Targets/NativeCPU.cpp create mode 100644 clang/lib/Basic/Targets/NativeCPU.h rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/SOURCES (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/cl_khr_int64_extended_atomics/minmax_helpers.ll (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/integer/popcount.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/clc_sqrt.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/fma.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/helpers.h (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_cos.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_exp.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_exp2.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_log.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_log10.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_log2.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_sin.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/native_sqrt.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/round.cl (100%) rename libclc/libspirv/lib/{native_cpu-unknown-linux => native_cpu}/math/sqrt.cl (100%) delete mode 100644 llvm/include/llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h delete mode 100644 llvm/lib/SYCLNativeCPUUtils/FixABIMuxBuiltinsSYCLNativeCPU.cpp diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 331dfbb3f4b6..6272b7d20112 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -108,6 +108,7 @@ add_clang_library(clangBasic Targets/MSP430.cpp Targets/Mips.cpp Targets/NVPTX.cpp + Targets/NativeCPU.cpp Targets/OSTargets.cpp Targets/PNaCl.cpp Targets/PPC.cpp diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 72d418b1228e..c0bf4e686cf0 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -545,41 +545,6 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (Opts.FakeAddressSpaceMap) AddrSpaceMap = &FakeAddrSpaceMap; - - if ((Opts.SYCLIsDevice || Opts.OpenCL) && Opts.SYCLIsNativeCPU) { - // For SYCL Native CPU we use the NVPTXAddrSpaceMap because - // we need builtins to be mangled with AS information. - // This is also enabled in OpenCL mode so that mangling - // matches when building libclc. - - static const unsigned SYCLNativeCPUASMap[] = { - 0, // Default - 1, // opencl_global - 3, // opencl_local - 4, // opencl_constant - 0, // opencl_private - 0, // opencl_generic - 1, // opencl_global_device - 1, // opencl_global_host - 1, // cuda_device - 4, // cuda_constant - 3, // cuda_shared - 1, // sycl_global - 1, // sycl_global_device - 1, // sycl_global_host - 3, // sycl_local - 0, // sycl_private - 0, // ptr32_sptr - 0, // ptr32_uptr - 0, // ptr64 - 0, // hlsl_groupshared - 0, // hlsl_constant - 20, // wasm_funcref - }; - - AddrSpaceMap = &SYCLNativeCPUASMap; - UseAddrSpaceMapMangling = true; - } } bool TargetInfo::initFeatureMap( diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 6085ec8c8762..7d66abbf8ba0 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -28,6 +28,7 @@ #include "Targets/MSP430.h" #include "Targets/Mips.h" #include "Targets/NVPTX.h" +#include "Targets/NativeCPU.h" #include "Targets/OSTargets.h" #include "Targets/PNaCl.h" #include "Targets/PPC.h" @@ -117,6 +118,13 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, default: return nullptr; + case llvm::Triple::UnknownArch: + /* native_cpu is only known to Clang, not to LLVM. */ + if (Triple.str() == "native_cpu") + return std::make_unique(Triple, Opts); + + return nullptr; + case llvm::Triple::arc: return std::make_unique(Triple, Opts); diff --git a/clang/lib/Basic/Targets/NativeCPU.cpp b/clang/lib/Basic/Targets/NativeCPU.cpp new file mode 100644 index 000000000000..1f3929f7c6a2 --- /dev/null +++ b/clang/lib/Basic/Targets/NativeCPU.cpp @@ -0,0 +1,109 @@ +//===--- NativeCPU.cpp - Implement NativeCPU target feature support -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements NativeCPU TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#include "NativeCPU.h" +#include + +using namespace clang; +using namespace clang::targets; + +static const unsigned NativeCPUASMap[] = { + 0, // Default + 1, // opencl_global + 3, // opencl_local + 4, // opencl_constant + 0, // opencl_private + 0, // opencl_generic + 1, // opencl_global_device + 1, // opencl_global_host + 1, // cuda_device + 4, // cuda_constant + 3, // cuda_shared + 1, // sycl_global + 1, // sycl_global_device + 1, // sycl_global_host + 3, // sycl_local + 0, // sycl_private + 0, // ptr32_sptr + 0, // ptr32_uptr + 0, // ptr64 + 0, // hlsl_groupshared + 0, // hlsl_constant + 20, // wasm_funcref +}; + +NativeCPUTargetInfo::NativeCPUTargetInfo(const llvm::Triple &, + const TargetOptions &Opts) + : TargetInfo(llvm::Triple()) { + AddrSpaceMap = &NativeCPUASMap; + UseAddrSpaceMapMangling = true; + HasLegalHalfType = true; + HasFloat16 = true; + resetDataLayout("e"); + + llvm::Triple HostTriple([&] { + // Take the default target triple if no other host triple is specified so + // that system headers work. + if (Opts.HostTriple.empty()) + return llvm::sys::getDefaultTargetTriple(); + + return Opts.HostTriple; + }()); + if (HostTriple.getArch() != llvm::Triple::UnknownArch) { + HostTarget = AllocateTarget(HostTriple, Opts); + + // Copy properties from host target. + BoolWidth = HostTarget->getBoolWidth(); + BoolAlign = HostTarget->getBoolAlign(); + IntWidth = HostTarget->getIntWidth(); + IntAlign = HostTarget->getIntAlign(); + HalfWidth = HostTarget->getHalfWidth(); + HalfAlign = HostTarget->getHalfAlign(); + FloatWidth = HostTarget->getFloatWidth(); + FloatAlign = HostTarget->getFloatAlign(); + DoubleWidth = HostTarget->getDoubleWidth(); + DoubleAlign = HostTarget->getDoubleAlign(); + LongWidth = HostTarget->getLongWidth(); + LongAlign = HostTarget->getLongAlign(); + LongLongWidth = HostTarget->getLongLongWidth(); + LongLongAlign = HostTarget->getLongLongAlign(); + PointerWidth = HostTarget->getPointerWidth(LangAS::Default); + PointerAlign = HostTarget->getPointerAlign(LangAS::Default); + MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0, + /* HasNonWeakDef = */ true); + NewAlign = HostTarget->getNewAlign(); + DefaultAlignForAttributeAligned = + HostTarget->getDefaultAlignForAttributeAligned(); + SizeType = HostTarget->getSizeType(); + PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default); + IntMaxType = HostTarget->getIntMaxType(); + WCharType = HostTarget->getWCharType(); + WIntType = HostTarget->getWIntType(); + Char16Type = HostTarget->getChar16Type(); + Char32Type = HostTarget->getChar32Type(); + Int64Type = HostTarget->getInt64Type(); + SigAtomicType = HostTarget->getSigAtomicType(); + ProcessIDType = HostTarget->getProcessIDType(); + + UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); + UseZeroLengthBitfieldAlignment = + HostTarget->useZeroLengthBitfieldAlignment(); + UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); + ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); + + // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and + // we need those macros to be identical on host and device, because (among + // other things) they affect which standard library classes are defined, + // and we need all classes to be defined on both the host and device. + MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); + } +} diff --git a/clang/lib/Basic/Targets/NativeCPU.h b/clang/lib/Basic/Targets/NativeCPU.h new file mode 100644 index 000000000000..47587e27bc00 --- /dev/null +++ b/clang/lib/Basic/Targets/NativeCPU.h @@ -0,0 +1,70 @@ +//===--- NativeCPU.h - Declare NativeCPU target feature support -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares NativeCPU TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_NATIVECPU_H +#define LLVM_CLANG_LIB_BASIC_TARGETS_NATIVECPU_H + +#include "Targets.h" + +namespace clang { +namespace targets { + +class LLVM_LIBRARY_VISIBILITY NativeCPUTargetInfo final : public TargetInfo { + std::unique_ptr HostTarget; + +public: + NativeCPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override { + DefineStd(Builder, "NativeCPU", Opts); + } + + ArrayRef getTargetBuiltins() const override { return {}; } + + BuiltinVaListKind getBuiltinVaListKind() const override { + if (HostTarget) + return HostTarget->getBuiltinVaListKind(); + + return TargetInfo::VoidPtrBuiltinVaList; + } + + bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &info) const override { + return true; + } + + std::string_view getClobbers() const override { return ""; } + + void setSupportedOpenCLOpts() override { supportAllOpenCLOpts(); } + + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { + if (HostTarget) + return HostTarget->checkCallingConvention(CC); + + return TargetInfo::checkCallingConvention(CC); + } + +protected: + ArrayRef getGCCRegNames() const override { return {}; } + + ArrayRef getGCCRegAliases() const override { + return {}; + } + + bool hasBitIntType() const override { return true; } +}; + +} // namespace targets +} // namespace clang + +#endif // LLVM_CLANG_LIB_BASIC_TARGETS_NATIVECPU_H diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 06718c02121c..fe2fd04dfa8f 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -891,6 +891,10 @@ static bool isValidSYCLTriple(llvm::Triple T) { !T.hasEnvironment()) return true; + // 'native_cpu' is valid for Native CPU. + if (isSYCLNativeCPU(T)) + return true; + // Check for invalid SYCL device triple values. // Non-SPIR/SPIRV arch. if (!T.isSPIROrSPIRV()) @@ -1354,12 +1358,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, } Arch = Device->data(); UserTargetName = "amdgcn-amd-amdhsa"; - } else if (Val == "native_cpu") { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - llvm::Triple HostTriple = HostTC->getTriple(); - SYCLTriples.insert(HostTriple.normalize()); - continue; } llvm::Triple DeviceTriple(getSYCLDeviceTriple(UserTargetName)); @@ -5629,9 +5627,7 @@ class OffloadingActionBuilder final { auto IsAMDGCN = TargetTriple.isAMDGCN(); auto IsSPIR = TargetTriple.isSPIROrSPIRV(); bool IsSpirvAOT = TargetTriple.isSPIRAOT(); - const bool IsSYCLNativeCPU = - TC->getAuxTriple() && - driver::isSYCLNativeCPU(TargetTriple, *TC->getAuxTriple()); + bool IsSYCLNativeCPU = isSYCLNativeCPU(TargetTriple); for (const auto &Input : ListIndex) { if (TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga && types::isFPGA(Input->getType())) { @@ -6695,12 +6691,6 @@ class OffloadingActionBuilder final { C.getDriver().getSYCLDeviceTriple("amdgcn-amd-amdhsa"), ValidDevice->data()); UserTargetName = "amdgcn-amd-amdhsa"; - } else if (Val == "native_cpu") { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - llvm::Triple TT = HostTC->getTriple(); - SYCLTripleList.push_back(TT); - continue; } llvm::Triple TT( @@ -7239,10 +7229,6 @@ class OffloadingActionBuilder final { /// Offload deps output is then forwarded to active device action builders so /// they can add it to the device linker inputs. void addDeviceLinkDependenciesFromHost(ActionList &LinkerInputs) { - if (isSYCLNativeCPU(C.getArgs())) { - // SYCL Native CPU doesn't need deps from clang-offload-deps. - return; - } // Link image for reading dependencies from it. auto *LA = C.MakeAction(LinkerInputs, types::TY_Host_Dependencies_Image); @@ -9614,9 +9600,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( Action::OffloadKind DependentOffloadKind; if (UI.DependentOffloadKind == Action::OFK_SYCL && TargetDeviceOffloadKind == Action::OFK_None && - !(isSYCLNativeCPU(Args) && - isSYCLNativeCPU(C.getDefaultToolChain().getTriple(), - TC->getTriple()) && + !(isSYCLNativeCPU(C.getDefaultToolChain().getTriple()) && UA->getDependentActionsInfo().size() > 1)) DependentOffloadKind = Action::OFK_Host; else @@ -10675,7 +10659,7 @@ const ToolChain &Driver::getOffloadingDeviceToolChain( *this, Target, HostTC, Args, TargetDeviceOffloadKind); break; default: - if (isSYCLNativeCPU(Args)) { + if (isSYCLNativeCPU(Target)) { TC = std::make_unique(*this, Target, HostTC, Args); } diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index 8458bf0d819f..e1dd23a4c3a5 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -142,7 +142,8 @@ bool OffloadTargetInfo::isOffloadKindCompatible( } bool OffloadTargetInfo::isTripleValid() const { - return !Triple.str().empty() && Triple.getArch() != Triple::UnknownArch; + return !Triple.str().empty() && (Triple.getArch() != Triple::UnknownArch || + Triple.str() == "native_cpu"); } bool OffloadTargetInfo::operator==(const OffloadTargetInfo &Target) const { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index aa2c69d38d83..70808d546662 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5420,8 +5420,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = getToolChain(); const llvm::Triple &RawTriple = TC.getTriple(); - const llvm::Triple &Triple = TC.getEffectiveTriple(); - const std::string &TripleStr = Triple.getTriple(); + llvm::Triple Triple = TC.getEffectiveTriple(); + std::string TripleStr = Triple.getTriple(); bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); @@ -5463,7 +5463,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, bool IsUsingLTO = LTOMode != LTOK_None; bool IsFPGASYCLOffloadDevice = IsSYCLDevice && Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga; - const bool IsSYCLNativeCPU = isSYCLNativeCPU(TC); // Perform the SYCL host compilation using an external compiler if the user // requested. @@ -5522,6 +5521,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsCuda || IsHIP || IsSYCL) IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment(); + // Adjust for SYCL NativeCPU compilations. When compiling in device mode, we + // adjust to host after the initial compilation. + const bool IsSYCLNativeCPU = isSYCLNativeCPU(Triple); + if (IsSYCL && IsSYCLDevice && IsSYCLNativeCPU && AuxTriple && + isa(JA)) { + Triple = *AuxTriple; + TripleStr = Triple.getTriple(); + } + // C++ is not supported for IAMCU. if (IsIAMCU && types::isCXX(Input.getType())) D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU"; @@ -6029,6 +6037,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D); } if (IsSYCLDevice && IsSYCLNativeCPU) { + CmdArgs.push_back("-Wno-override-module"); CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-sycl-native-cpu-backend"); } @@ -10338,10 +10347,6 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, TargetTripleOpt = ("llvm_" + TargetTripleOpt).str(); } - const bool IsSYCLNativeCPU = isSYCLNativeCPU(TC); - if (IsSYCLNativeCPU) { - TargetTripleOpt = "native_cpu"; - } WrapperArgs.push_back( C.getArgs().MakeArgString(Twine("-target=") + TargetTripleOpt)); @@ -11059,7 +11064,7 @@ static bool shouldEmitOnlyKernelsAsEntryPoints(const ToolChain &TC, if (TCArgs.hasFlag(options::OPT_fno_sycl_remove_unused_external_funcs, options::OPT_fsycl_remove_unused_external_funcs, false)) return false; - if (isSYCLNativeCPU(TC)) + if (isSYCLNativeCPU(Triple)) return true; // When supporting dynamic linking, non-kernels in a device image can be // called. @@ -11118,7 +11123,7 @@ static void getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (!Triple.isAMDGCN()) addArgs(PostLinkArgs, TCArgs, {"-emit-param-info"}); // Enable program metadata - if (Triple.isNVPTX() || Triple.isAMDGCN() || isSYCLNativeCPU(TC)) + if (Triple.isNVPTX() || Triple.isAMDGCN() || isSYCLNativeCPU(Triple)) addArgs(PostLinkArgs, TCArgs, {"-emit-program-metadata"}); if (OutputType != types::TY_LLVM_BC) { assert(OutputType == types::TY_Tempfiletable); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 7d311fa784df..789b3f59fb09 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -626,11 +626,6 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, const SYCLDeviceLibsList SYCLNativeCpuDeviceLibs = { {"libsycl-nativecpu_utils", "internal"}}; - const bool isNativeCPU = - (driver::isSYCLNativeCPU(Args) && - driver::isSYCLNativeCPU(C.getDefaultToolChain().getTriple(), - TargetTriple)); - bool IsWindowsMSVCEnv = C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); @@ -788,7 +783,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, addLibraries(SYCLDeviceTsanLibs); #endif - if (isNativeCPU) + if (isSYCLNativeCPU(TargetTriple)) addLibraries(SYCLNativeCpuDeviceLibs); return LibraryList; @@ -950,7 +945,6 @@ const char *SYCL::Linker::constructLLVMLinkCommand( // instead of the original object. if (JA.isDeviceOffloading(Action::OFK_SYCL)) { bool IsRDC = !shouldDoPerObjectFileLinking(C); - const bool IsSYCLNativeCPU = isSYCLNativeCPU(this->getToolChain()); auto isNoRDCDeviceCodeLink = [&](const InputInfo &II) { if (IsRDC) return false; @@ -964,6 +958,8 @@ const char *SYCL::Linker::constructLLVMLinkCommand( const ToolChain *HostTC = C.getSingleOffloadToolChain(); const bool IsNVPTX = this->getToolChain().getTriple().isNVPTX(); const bool IsAMDGCN = this->getToolChain().getTriple().isAMDGCN(); + const bool IsSYCLNativeCPU = + isSYCLNativeCPU(this->getToolChain().getTriple()); const bool IsFPGA = this->getToolChain().getTriple().isSPIR() && this->getToolChain().getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga; @@ -1115,7 +1111,8 @@ void SYCL::Linker::ConstructJob(Compilation &C, const JobAction &JA, assert((getToolChain().getTriple().isSPIROrSPIRV() || getToolChain().getTriple().isNVPTX() || - getToolChain().getTriple().isAMDGCN() || isSYCLNativeCPU(Args)) && + getToolChain().getTriple().isAMDGCN() || + isSYCLNativeCPU(getToolChain().getTriple())) && "Unsupported target"); std::string SubArchName = @@ -1718,9 +1715,9 @@ static ArrayRef getUnsupportedOpts() { } // Currently supported options by SYCL NativeCPU device compilation -static inline bool SupportedByNativeCPU(const SYCLToolChain &TC, +static inline bool SupportedByNativeCPU(const llvm::Triple &Triple, const OptSpecifier &Opt) { - if (!TC.IsSYCLNativeCPU) + if (!isSYCLNativeCPU(Triple)) return false; switch (Opt.getID()) { @@ -1737,7 +1734,6 @@ static inline bool SupportedByNativeCPU(const SYCLToolChain &TC, SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) : ToolChain(D, Triple, Args), HostTC(HostTC), - IsSYCLNativeCPU(Triple == HostTC.getTriple()), SYCLInstallation(D, Triple, Args) { // Lookup binaries into the driver directory, this is used to discover any // dependent SYCL offload compilation tools. @@ -1747,7 +1743,7 @@ SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, for (OptSpecifier Opt : getUnsupportedOpts()) { if (const Arg *A = Args.getLastArg(Opt)) { // Native CPU can support options unsupported by other targets. - if (SupportedByNativeCPU(*this, Opt)) + if (SupportedByNativeCPU(getTriple(), Opt)) continue; // All sanitizer options are not currently supported, except // AddressSanitizer and MemorySanitizer and ThreadSanitizer @@ -1791,7 +1787,7 @@ SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, for (OptSpecifier UnsupportedOpt : getUnsupportedOpts()) { if (Opt.matches(UnsupportedOpt)) { // NativeCPU should allow most normal cpu options. - if (SupportedByNativeCPU(*this, Opt.getID())) + if (SupportedByNativeCPU(getTriple(), Opt.getID())) continue; if (Opt.getID() == options::OPT_fsanitize_EQ && A->getValues().size() == 1) { @@ -2186,7 +2182,7 @@ Tool *SYCLToolChain::buildBackendCompiler() const { } Tool *SYCLToolChain::buildLinker() const { - assert(getTriple().isSPIROrSPIRV() || IsSYCLNativeCPU); + assert(getTriple().isSPIROrSPIRV() || isSYCLNativeCPU(getTriple())); return new tools::SYCL::Linker(*this); } diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 66a1313fbce3..1fea680a216e 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -269,6 +269,11 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool { } // end namespace SYCL } // end namespace tools +inline bool isSYCLNativeCPU(const llvm::Triple &Triple) { + return Triple.getArch() == llvm::Triple::UnknownArch && + Triple.str() == "native_cpu"; +} + namespace toolchains { class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { @@ -309,12 +314,12 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { bool useIntegratedAs() const override { return true; } bool isPICDefault() const override { - if (this->IsSYCLNativeCPU) + if (isSYCLNativeCPU(this->getTriple())) return this->HostTC.isPICDefault(); return false; } llvm::codegenoptions::DebugInfoFormat getDefaultDebugFormat() const override { - if (this->IsSYCLNativeCPU || + if (isSYCLNativeCPU(this->getTriple()) && this->HostTC.getTriple().isWindowsMSVCEnvironment()) return this->HostTC.getDefaultDebugFormat(); return ToolChain::getDefaultDebugFormat(); @@ -337,9 +342,6 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { SanitizerMask getSupportedSanitizers() const override; - const bool IsSYCLNativeCPU; - - protected: Tool *buildBackendCompiler() const override; Tool *buildLinker() const override; @@ -353,24 +355,6 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { }; } // end namespace toolchains - -inline bool isSYCLNativeCPU(const llvm::opt::ArgList &Args) { - if (auto SYCLTargets = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { - if (SYCLTargets->containsValue("native_cpu")) - return true; - } - return false; -} - -inline bool isSYCLNativeCPU(const llvm::Triple &HostT, - const llvm::Triple &DevT) { - return HostT == DevT; -} - -inline bool isSYCLNativeCPU(const ToolChain &TC) { - const llvm::Triple *const AuxTriple = TC.getAuxTriple(); - return AuxTriple && isSYCLNativeCPU(TC.getTriple(), *AuxTriple); -} } // end namespace driver } // end namespace clang diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 73b117081b43..cff7195e30ab 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -644,7 +644,7 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args, SmallVector &PostLinkArgs, const llvm::Triple Triple) { const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); - bool SYCLNativeCPU = (HostTriple == Triple); + bool SYCLNativeCPU = Triple.str() == "native_cpu"; bool SpecConstsSupported = (!Triple.isNVPTX() && !Triple.isAMDGCN() && !Triple.isSPIRAOT() && !SYCLNativeCPU); if (SpecConstsSupported) @@ -1504,7 +1504,10 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, if (!ClangPath) return ClangPath.takeError(); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + if (Triple.str() == "native_cpu") + Triple = llvm::Triple(Args.getLastArgValue(OPT_host_triple_EQ)); + StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); if (Arch.empty()) Arch = "native"; @@ -1673,6 +1676,9 @@ Expected linkDevice(ArrayRef InputFiles, case Triple::loongarch64: return generic::clang(InputFiles, Args); default: + if (Triple.str() == "native_cpu" && IsSYCLKind) + return generic::clang(InputFiles, Args); + return createStringError(Triple.getArchName() + " linking is not supported"); } diff --git a/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp b/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp index 5dbf04467510..4f3e7c2d0b7b 100644 --- a/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp +++ b/clang/tools/clang-offload-deps/ClangOffloadDeps.cpp @@ -120,7 +120,8 @@ int main(int argc, const char **argv) { .Case("fpga", true) .Default(false); - bool TripleIsValid = Triple(Triples[I]).getArch() != Triple::UnknownArch; + bool TripleIsValid = Triple(Triples[I]).getArch() != Triple::UnknownArch || + Triples[I] == "native_cpu"; if (!KindIsValid || !TripleIsValid) { SmallVector Buf; @@ -211,7 +212,9 @@ int main(int argc, const char **argv) { // global variable llvm.used to represent a reference to a symbol. But for // other targets we have to create a real reference since llvm.used may // not be representable in the object file. - if (Kinds[I] == "sycl" || Triple(Triples[I]).isSPIR()) { + if (Triples[I] == "native_cpu") { + // SYCL Native CPU doesn't need deps from clang-offload-deps. + } else if (Kinds[I] == "sycl" || Triple(Triples[I]).isSPIR()) { auto *GV = new GlobalVariable( Mod, ArrayTy, false, GlobalValue::AppendingLinkage, ConstantArray::get(ArrayTy, Used), "llvm.used"); diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 3506872d4eab..ad048501f48b 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -42,7 +42,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS libspirv/lib/amdgcn/SOURCES; libspirv/lib/ptx-nvidiacl/SOURCES; libspirv/lib/r600/SOURCES; - libspirv/lib/native_cpu-unknown-linux/SOURCES; + libspirv/lib/native_cpu/SOURCES; ) set( LIBCLC_MIN_LLVM 3.9.0 ) @@ -52,9 +52,6 @@ set( LIBCLC_TARGETS_TO_BUILD "all" option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF ) -set( LIBCLC_NATIVECPU_FLAGS_X86_64 "" - CACHE STRING "Semicolon-separated list of compiler flags for x86_64 libclc target.") - if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) # Out-of-tree configuration set( LIBCLC_STANDALONE_BUILD TRUE ) @@ -192,10 +189,10 @@ else() endforeach() endif() -option( LIBCLC_NATIVECPU_HOST_TARGET "Build libclc for Native CPU using the host triple." Off) +option( LIBCLC_NATIVECPU_HOST_TARGET "Build libclc for Native CPU." Off) if( LIBCLC_NATIVECPU_HOST_TARGET ) - list(APPEND LIBCLC_TARGETS_TO_BUILD ${LLVM_TARGET_TRIPLE}) + list(APPEND LIBCLC_TARGETS_TO_BUILD native_cpu) endif() list( SORT LIBCLC_TARGETS_TO_BUILD ) @@ -227,9 +224,7 @@ set( nvptx--nvidiacl_devices none ) set( nvptx64--nvidiacl_devices none ) set( spirv-mesa3d-_devices none ) set( spirv64-mesa3d-_devices none ) -# TODO: Does this need to be set for each possible triple? -set( x86_64-unknown-linux-gnu_devices none ) -set( aarch64-unknown-linux-gnu_devices none ) +set( native_cpu_devices none ) # Setup aliases set( cedar_aliases palm sumo sumo2 redwood juniper ) @@ -312,11 +307,9 @@ else(LIBCLC_STANDALONE_BUILD) endif(LIBCLC_STANDALONE_BUILD) file( TO_CMAKE_PATH ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/clc LIBCLC_LIBRARY_OUTPUT_INTDIR ) -set(NATIVECPU_SUPPORTED_ARCH "x86_64;aarch64") - foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) message( STATUS "libclc target '${t}' is enabled" ) - string( REPLACE "-" ";" TRIPLE ${t} ) + string( REPLACE "-" ";" TRIPLE ${t}-- ) list( GET TRIPLE 0 ARCH ) list( GET TRIPLE 1 VENDOR ) list( GET TRIPLE 2 OS ) @@ -345,16 +338,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( DARCH ${ARCH} ) endif() - set(IS_NATIVE_CPU_ARCH FALSE) - if( ARCH IN_LIST NATIVECPU_SUPPORTED_ARCH ) - set(IS_NATIVE_CPU_ARCH TRUE) - endif() - - if( IS_NATIVE_CPU_ARCH AND OS STREQUAL linux) - LIST( APPEND dirs native_cpu-unknown-linux ) - elseif( IS_NATIVE_CPU_ARCH AND NOT OS STREQUAL linux ) - message(WARNING "libclc is being built for an unsupported ARCH/OS" - " configuration, some SYCL programs may fail to build.") + if( ARCH STREQUAL native_cpu ) + LIST( APPEND dirs native_cpu ) endif() set( clc_lib_files ) @@ -421,11 +406,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # AMDGCN needs libclc to be compiled to high bc version since all atomic # clang builtins need to be accessible list( APPEND build_flags -mcpu=gfx940 -mllvm --amdgpu-oclc-reflect-enable=false ) - elseif( IS_NATIVE_CPU_ARCH ) + elseif( ARCH STREQUAL native_cpu ) list( APPEND build_flags -Xclang -fsycl-is-native-cpu ) - if( ARCH STREQUAL x86_64 ) - list( APPEND build_flags ${LIBCLC_NATIVECPU_FLAGS_X86_64}) - endif() endif() endif() @@ -464,7 +446,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( has_distinct_generic_addrspace FALSE ) elseif( ARCH STREQUAL amdgcn ) set( opt_flags -O3 --amdgpu-oclc-reflect-enable=false ) - elseif( IS_NATIVE_CPU_ARCH ) + elseif( ARCH STREQUAL native_cpu ) set( opt_flags -O3 ) set( has_distinct_generic_addrspace FALSE ) else() diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/SOURCES b/libclc/libspirv/lib/native_cpu/SOURCES similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/SOURCES rename to libclc/libspirv/lib/native_cpu/SOURCES diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/cl_khr_int64_extended_atomics/minmax_helpers.ll b/libclc/libspirv/lib/native_cpu/cl_khr_int64_extended_atomics/minmax_helpers.ll similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/cl_khr_int64_extended_atomics/minmax_helpers.ll rename to libclc/libspirv/lib/native_cpu/cl_khr_int64_extended_atomics/minmax_helpers.ll diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/integer/popcount.cl b/libclc/libspirv/lib/native_cpu/integer/popcount.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/integer/popcount.cl rename to libclc/libspirv/lib/native_cpu/integer/popcount.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/clc_sqrt.cl b/libclc/libspirv/lib/native_cpu/math/clc_sqrt.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/clc_sqrt.cl rename to libclc/libspirv/lib/native_cpu/math/clc_sqrt.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/fma.cl b/libclc/libspirv/lib/native_cpu/math/fma.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/fma.cl rename to libclc/libspirv/lib/native_cpu/math/fma.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/helpers.h b/libclc/libspirv/lib/native_cpu/math/helpers.h similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/helpers.h rename to libclc/libspirv/lib/native_cpu/math/helpers.h diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_cos.cl b/libclc/libspirv/lib/native_cpu/math/native_cos.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_cos.cl rename to libclc/libspirv/lib/native_cpu/math/native_cos.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp.cl b/libclc/libspirv/lib/native_cpu/math/native_exp.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp.cl rename to libclc/libspirv/lib/native_cpu/math/native_exp.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp2.cl b/libclc/libspirv/lib/native_cpu/math/native_exp2.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_exp2.cl rename to libclc/libspirv/lib/native_cpu/math/native_exp2.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log.cl b/libclc/libspirv/lib/native_cpu/math/native_log.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log.cl rename to libclc/libspirv/lib/native_cpu/math/native_log.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log10.cl b/libclc/libspirv/lib/native_cpu/math/native_log10.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log10.cl rename to libclc/libspirv/lib/native_cpu/math/native_log10.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log2.cl b/libclc/libspirv/lib/native_cpu/math/native_log2.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_log2.cl rename to libclc/libspirv/lib/native_cpu/math/native_log2.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sin.cl b/libclc/libspirv/lib/native_cpu/math/native_sin.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sin.cl rename to libclc/libspirv/lib/native_cpu/math/native_sin.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sqrt.cl b/libclc/libspirv/lib/native_cpu/math/native_sqrt.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/native_sqrt.cl rename to libclc/libspirv/lib/native_cpu/math/native_sqrt.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/round.cl b/libclc/libspirv/lib/native_cpu/math/round.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/round.cl rename to libclc/libspirv/lib/native_cpu/math/round.cl diff --git a/libclc/libspirv/lib/native_cpu-unknown-linux/math/sqrt.cl b/libclc/libspirv/lib/native_cpu/math/sqrt.cl similarity index 100% rename from libclc/libspirv/lib/native_cpu-unknown-linux/math/sqrt.cl rename to libclc/libspirv/lib/native_cpu/math/sqrt.cl diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index eb5c3ff2ebdf..2ab7ff2c7988 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -296,7 +296,20 @@ DefShuffleINTEL_All(double, f64, double) DefShuffleINTEL_All(float, f32, float) DefShuffleINTEL_All(_Float16, f16, _Float16) -// Vector versions of shuffle are generated by the FixABIBuiltinsSYCLNativeCPU pass +#define DefineShuffleVec(T, N, Sfx, MuxType) \ + using vt##T##N = sycl::vec::vector_t; \ + using vt##MuxType##N = sycl::vec::vector_t; \ + DefShuffleINTEL_All(vt##T##N, v##N##Sfx, vt##MuxType##N) + +#define DefineShuffleVec2to16(Type, Sfx, MuxType) \ + DefineShuffleVec(Type, 2, Sfx, MuxType) \ + DefineShuffleVec(Type, 4, Sfx, MuxType) \ + DefineShuffleVec(Type, 8, Sfx, MuxType) \ + DefineShuffleVec(Type, 16, Sfx, MuxType) + +DefineShuffleVec2to16(int32_t, i32, int32_t) +DefineShuffleVec2to16(uint32_t, i32, int32_t) +DefineShuffleVec2to16(float, f32, float) #define Define2ArgForward(Type, Name, Callee)\ DEVICE_EXTERNAL Type Name(Type a, Type b) { return Callee(a,b);} diff --git a/llvm/include/llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h deleted file mode 100644 index 9eea9a87fced..000000000000 --- a/llvm/include/llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h +++ /dev/null @@ -1,29 +0,0 @@ -//===---- FixABIMuxBuiltins.h - Fixup ABI issues with called mux builtins ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Creates calls to shuffle up/down/xor mux builtins taking into account ABI of the -// SYCL functions. For now this only is used for vector variants. -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" - - -namespace llvm { - -class FixABIMuxBuiltinsPass final - : public llvm::PassInfoMixin { - public: - llvm::PreservedAnalyses run(llvm::Module &, llvm::ModuleAnalysisManager &); -}; - -} // namespace llvm - diff --git a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt index a0609ff22243..36ed6c901b19 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt +++ b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt @@ -3,7 +3,6 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils PrepareSYCLNativeCPU.cpp RenameKernelSYCLNativeCPU.cpp ConvertToMuxBuiltinsSYCLNativeCPU.cpp - FixABIMuxBuiltinsSYCLNativeCPU.cpp FAtomicsNativeCPU.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/SYCLNativeCPUUtils/FixABIMuxBuiltinsSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/FixABIMuxBuiltinsSYCLNativeCPU.cpp deleted file mode 100644 index b3ff7372b8d0..000000000000 --- a/llvm/lib/SYCLNativeCPUUtils/FixABIMuxBuiltinsSYCLNativeCPU.cpp +++ /dev/null @@ -1,226 +0,0 @@ -//===-- FixABIMuxBuiltinsSYCLNativeCPU.cpp - Fixup mux ABI issues ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Creates calls to shuffle up/down/xor mux builtins taking into account ABI of -// the SYCL functions. For now this only is used for vector variants. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include - -#define DEBUG_TYPE "fix-abi-mux-builtins" - -using namespace llvm; - -PreservedAnalyses FixABIMuxBuiltinsPass::run(Module &M, - ModuleAnalysisManager &AM) { - bool Changed = false; - - // Decide if a function needs updated and if so what parameters need changing, - // as well as the return value - auto FunctionNeedsFixing = - [](Function &F, - llvm::SmallVectorImpl> &Updates, - llvm::Type *&RetVal, std::string &MuxFuncNameToCall) { - if (!F.isDeclaration()) { - return false; - } - if (!F.getName().contains("__spirv_SubgroupShuffle")) { - return false; - } - Updates.clear(); - auto LIDvPos = F.getName().find("ELIDv"); - llvm::StringRef NameToMatch; - if (LIDvPos != llvm::StringRef::npos) { - // Add sizeof ELIDv to get num characters to match against - NameToMatch = F.getName().take_front(LIDvPos + 5); - } else { - return false; - } - - unsigned int StartIdx = 0; - unsigned int EndIdx = 1; - if (NameToMatch == "_Z32__spirv_SubgroupShuffleDownINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_down_"; - } else if (NameToMatch == "_Z30__spirv_SubgroupShuffleUpINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_up_"; - } else if (NameToMatch == "_Z28__spirv_SubgroupShuffleINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_"; - EndIdx = 0; - } else if (NameToMatch == "_Z31__spirv_SubgroupShuffleXorINTELIDv") { - MuxFuncNameToCall = "__mux_sub_group_shuffle_xor_"; - EndIdx = 0; - } else { - return false; - } - - // We need to create the body for this. First we need to find out what - // the first arguments should be - llvm::StringRef RemainingName = - F.getName().drop_front(NameToMatch.size()); - std::string MuxFuncTypeStr = "UNKNOWN"; - - unsigned int VecWidth = 0; - if (RemainingName.consumeInteger(10, VecWidth)) { - return false; - } - if (!RemainingName.consume_front("_")) { - return false; - } - - char TypeCh = RemainingName[0]; - Type *BaseType = nullptr; - switch (TypeCh) { - case 'a': - case 'h': - BaseType = llvm::Type::getInt8Ty(F.getContext()); - MuxFuncTypeStr = "i8"; - break; - case 's': - case 't': - BaseType = llvm::Type::getInt16Ty(F.getContext()); - MuxFuncTypeStr = "i16"; - break; - - case 'i': - case 'j': - BaseType = llvm::Type::getInt32Ty(F.getContext()); - MuxFuncTypeStr = "i32"; - break; - case 'l': - case 'm': - BaseType = llvm::Type::getInt64Ty(F.getContext()); - MuxFuncTypeStr = "i64"; - break; - case 'f': - BaseType = llvm::Type::getFloatTy(F.getContext()); - MuxFuncTypeStr = "f32"; - break; - case 'd': - BaseType = llvm::Type::getDoubleTy(F.getContext()); - MuxFuncTypeStr = "f64"; - break; - default: - return false; - } - auto *VecType = llvm::FixedVectorType::get(BaseType, VecWidth); - RetVal = VecType; - - // Work out the mux function to call's type extension based on v##N##Sfx - MuxFuncNameToCall += "v"; - MuxFuncNameToCall += std::to_string(VecWidth); - MuxFuncNameToCall += MuxFuncTypeStr; - - unsigned int CurrentIndex = 0; - for (auto &Arg : F.args()) { - if (Arg.hasStructRetAttr()) { - StartIdx++; - EndIdx++; - } else { - if (CurrentIndex >= StartIdx && CurrentIndex <= EndIdx) { - if (Arg.getType() != VecType) { - Updates.push_back(std::pair( - CurrentIndex, VecType)); - } - } - } - CurrentIndex++; - } - return true; - }; - - llvm::SmallVector FuncsToProcess; - for (auto &F : M.functions()) { - FuncsToProcess.push_back(&F); - } - - for (auto *F : FuncsToProcess) { - llvm::SmallVector, 4> ArgUpdates; - llvm::Type *RetType = nullptr; - std::string MuxFuncNameToCall; - if (!FunctionNeedsFixing(*F, ArgUpdates, RetType, MuxFuncNameToCall)) { - continue; - } - if (!F->isDeclaration()) { - continue; - } - Changed = true; - IRBuilder<> IR(BasicBlock::Create(F->getContext(), "", F)); - - llvm::SmallVector Args; - unsigned int ArgIndex = 0; - unsigned int UpdateIndex = 0; - - for (auto &Arg : F->args()) { - if (!Arg.hasStructRetAttr()) { - if (UpdateIndex < ArgUpdates.size() && - std::get<0>(ArgUpdates[UpdateIndex]) == ArgIndex) { - Args.push_back(std::get<1>(ArgUpdates[UpdateIndex])); - UpdateIndex++; - } else { - Args.push_back(Arg.getType()); - } - } - ArgIndex++; - } - - FunctionType *FT = FunctionType::get(RetType, Args, false); - Function *NewFunc = - Function::Create(FT, F->getLinkage(), MuxFuncNameToCall, M); - llvm::SmallVector CallArgs; - auto NewFuncArgItr = NewFunc->args().begin(); - Argument *SretPtr = nullptr; - for (auto &Arg : F->args()) { - if (Arg.hasStructRetAttr()) { - SretPtr = &Arg; - } else { - if (Arg.getType() != (*NewFuncArgItr).getType()) { - if (Arg.getType()->isPointerTy()) { - Value *ArgLoad = IR.CreateLoad((*NewFuncArgItr).getType(), &Arg); - CallArgs.push_back(ArgLoad); - } else { - Value *ArgCast = IR.CreateBitCast(&Arg, (*NewFuncArgItr).getType()); - CallArgs.push_back(ArgCast); - } - } else { - CallArgs.push_back(&Arg); - } - NewFuncArgItr++; - } - } - - Value *Res = IR.CreateCall(NewFunc, CallArgs); - // If the return type is different to the initial function, then bitcast it - // unless it's void in which case we'd expect an StructRet parameter which - // needs stored to. - if (F->getReturnType() != RetType) { - if (F->getReturnType()->isVoidTy()) { - // If we don't have an StructRet parameter then something is wrong with - // the initial function - if (!SretPtr) { - llvm_unreachable( - "No struct ret pointer for Sub group shuffle function"); - } - - IR.CreateStore(Res, SretPtr); - } else { - Res = IR.CreateBitCast(Res, F->getReturnType()); - } - } - if (F->getReturnType()->isVoidTy()) { - IR.CreateRetVoid(); - } else { - IR.CreateRet(Res); - } - } - - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index b30b6c41c2b9..f36d29da74b6 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" -#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/SpecConstants.h" @@ -75,7 +74,6 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( #ifdef NATIVECPU_USE_OCK MPM.addPass(compiler::utils::PrepareBarriersPass()); MPM.addPass(compiler::utils::TransferKernelMetadataPass()); - MPM.addPass(FixABIMuxBuiltinsPass()); // Always enable vectorizer, unless explictly disabled or -O0 is set. if (OptLevel != OptimizationLevel::O0 && !SYCLNativeCPUNoVecz) { MAM.registerPass([] { return vecz::TargetInfoAnalysis(); }); From 53b96e6d55df426bd69cdf0c32666cab5037aa07 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Wed, 12 Mar 2025 22:56:34 +0000 Subject: [PATCH 02/12] More updates. * shuffle_abi tested the pass that has been removed, so the test can go too. * Other tests are adjusted as appropriate. * Remove AArch64 special case in libclc. --- clang/test/CodeGenSYCL/native_cpu_as.cpp | 16 ++-- clang/test/Driver/sycl-native-cpu-fsycl.cpp | 38 ++++---- clang/test/Driver/sycl-native-cpu.cpp | 6 +- clang/test/Driver/sycl.c | 3 +- libclc/CMakeLists.txt | 3 - .../native_cpu/shuffle_abi.cpp | 93 ------------------- .../sycl-native-cpu-libclc-linux.cpp | 2 +- .../sycl-native-cpu-libclc-windows.cpp | 2 +- 8 files changed, 34 insertions(+), 129 deletions(-) delete mode 100644 sycl/test/check_device_code/native_cpu/shuffle_abi.cpp diff --git a/clang/test/CodeGenSYCL/native_cpu_as.cpp b/clang/test/CodeGenSYCL/native_cpu_as.cpp index 9dfaa3d43b8b..62974e6d4f41 100644 --- a/clang/test/CodeGenSYCL/native_cpu_as.cpp +++ b/clang/test/CodeGenSYCL/native_cpu_as.cpp @@ -1,11 +1,11 @@ // This test is temporarily disabled for SYCL Native CPU on Windows // UNSUPPORTED: system-windows // Checks that name mangling matches between SYCL Native CPU and OpenCL when -fsycl-is-native-cpu is set -// RUN: %clang_cc1 -DCPP -fsycl-is-device -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_sycl.ll %s -// RUN: FileCheck -input-file=%t_sycl.ll %s +// RUN: %clang_cc1 -triple=native_cpu -DCPP -fsycl-is-device -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_sycl.ll %s +// RUN: FileCheck -input-file=%t_sycl.ll %s -// RUN: %clang_cc1 -x cl -DOCL -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_ocl.ll %s -// RUN: FileCheck -input-file=%t_ocl.ll %s +// RUN: %clang_cc1 -triple=native_cpu -x cl -DOCL -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_ocl.ll %s +// RUN: FileCheck -input-file=%t_ocl.ll %s #ifdef CPP #define AS_LOCAL __attribute((address_space(3))) @@ -13,7 +13,7 @@ #define AS_PRIVATE __attribute((address_space(0))) #define ATTRS [[intel::device_indirectly_callable]] #define ATTRS2 SYCL_EXTERNAL -#else +#else #ifdef OCL #define AS_LOCAL __local #define AS_GLOBAL __global @@ -29,8 +29,6 @@ ATTRS void func(AS_LOCAL int *p1, AS_GLOBAL int *p2, AS_PRIVATE int *p3){ int private_var; use_private(&private_var); } -// CHECK: define dso_local void @_Z4funcPU3AS3iPU3AS1iPi( -// CHECK: call void @_Z11use_privatePi( - - +// CHECK: define void @_Z4funcPU3AS3iPU3AS1iPi( +// CHECK: call void @_Z11use_privatePi( diff --git a/clang/test/Driver/sycl-native-cpu-fsycl.cpp b/clang/test/Driver/sycl-native-cpu-fsycl.cpp index d4dafdf89b61..9ef386d86c71 100644 --- a/clang/test/Driver/sycl-native-cpu-fsycl.cpp +++ b/clang/test/Driver/sycl-native-cpu-fsycl.cpp @@ -5,8 +5,8 @@ //RUN: %clang -fsycl --sysroot=%S/Inputs/SYCL -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -target aarch64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=CHECK_ACTIONS-AARCH64 //Link together multiple TUs. -//RUN: touch %t_1.o -//RUN: touch %t_2.o +//RUN: touch %t_1.o +//RUN: touch %t_2.o //RUN: %clang -fsycl -fsycl-targets=native_cpu --sysroot=%S/Inputs/SYCL -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc %t_1.o %t_2.o -ccc-print-bindings 2>&1 | FileCheck %s --check-prefix=CHECK_BINDINGS_MULTI_TU //CHECK_ACTIONS: +- 0: input, "{{.*}}sycl-native-cpu-fsycl.cpp", c++, (host-sycl) @@ -14,7 +14,7 @@ //CHECK_ACTIONS: | +- 2: input, "{{.*}}sycl-native-cpu-fsycl.cpp", c++, (device-sycl) //CHECK_ACTIONS: | +- 3: preprocessor, {2}, c++-cpp-output, (device-sycl) //CHECK_ACTIONS: |- 4: compiler, {3}, ir, (device-sycl) -//CHECK_ACTIONS: +- 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (x86_64-unknown-linux-gnu)" {4}, c++-cpp-output +//CHECK_ACTIONS: +- 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (native_cpu)" {4}, c++-cpp-output //CHECK_ACTIONS: +- 6: compiler, {5}, ir, (host-sycl) //CHECK_ACTIONS: +- 7: backend, {6}, assembler, (host-sycl) //CHECK_ACTIONS:+- 8: assembler, {7}, object, (host-sycl) @@ -27,7 +27,7 @@ //this is where we compile the device code to a shared lib, and we link the host shared lib and the device shared lib //CHECK_ACTIONS:| +- [[VAL81:.*]]: backend, {[[NCPULINK]]}, assembler, (device-sycl) //CHECK_ACTIONS:| +- [[VAL82:.*]]: assembler, {[[VAL81]]}, object, (device-sycl) -//CHECK_ACTIONS:|- [[VAL822:.*]]: offload, "device-sycl (x86_64-unknown-linux-gnu)" {[[VAL82]]}, object +//CHECK_ACTIONS:|- [[VAL822:.*]]: offload, "device-sycl (native_cpu)" {[[VAL82]]}, object //call sycl-post-link and clang-offload-wrapper //CHECK_ACTIONS:| +- [[VAL83:.*]]: sycl-post-link, {[[LINKALL]]}, tempfiletable, (device-sycl) //CHECK_ACTIONS:| +- [[VAL84:.*]]: clang-offload-wrapper, {[[VAL83]]}, object, (device-sycl) @@ -44,24 +44,26 @@ //CHECK_BINDINGS:# "{{.*}}" - "offload wrapper", inputs: ["[[TABLEFILE]].table"], output: "[[WRAPPEROBJ:.*]].o" //CHECK_BINDINGS:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[HOSTOBJ]].o", "[[KERNELOBJ]].o", "[[WRAPPEROBJ]].o"], output: "a.{{.*}}" -//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-fsycl-is-native-cpu" "-D" "__SYCL_NATIVE_CPU__" +//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-fsycl-is-native-cpu" "-D" "__SYCL_NATIVE_CPU__" //CHECK_INVO:{{.*}}clang{{.*}}"-fsycl-is-host"{{.*}} //CHECK_INVO:{{.*}}clang{{.*}}"-x" "ir" //CHECK_INVO:{{.*}}sycl-post-link{{.*}}"-emit-program-metadata" // checks that the device and host triple is correct in the generated actions when it is set explicitly -//CHECK_ACTIONS-AARCH64: +- 5: offload, "host-sycl (aarch64-unknown-linux-gnu)" {1}, "device-sycl (aarch64-unknown-linux-gnu)" {4}, c++-cpp-output -//CHECK_ACTIONS-AARCH64:|- 16: offload, "device-sycl (aarch64-unknown-linux-gnu)" {15}, object -//CHECK_ACTIONS-AARCH64:|- 19: offload, "device-sycl (aarch64-unknown-linux-gnu)" {18}, object +//CHECK_ACTIONS-AARCH64: +- 5: offload, "host-sycl (aarch64-unknown-linux-gnu)" {1}, "device-sycl (native_cpu)" {4}, c++-cpp-output +//CHECK_ACTIONS-AARCH64:|- 16: offload, "device-sycl (native_cpu)" {15}, object +//CHECK_ACTIONS-AARCH64:|- 19: offload, "device-sycl (native_cpu)" {18}, object // checks that bindings are correct when linking together multiple TUs on native cpu -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE1HOST:.*]].o", "[[FILE1DEV:.*]].o"] -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE2HOST:.*]].o", "[[FILE2DEV:.*]].o"] -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE1DEV]].o"], output: "[[FILE1SPV:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE2DEV]].o"], output: "[[FILE2SPV:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL::Linker", inputs: ["[[FILE1SPV]].bc", "[[FILE2SPV]].bc"], output: "[[LINK1:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL::Linker", inputs: ["[[LINK1]].bc", "{{.*}}.bc"], output: "[[LINK2:.*]].bc" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "clang", inputs: ["{{.*}}.bc"], output: "[[KERNELO:.*]].o" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL post link", inputs: ["[[LINK2]].bc"], output: "[[POSTL:.*]].table" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload wrapper", inputs: ["[[POSTL]].table"], output: "[[WRAP:.*]].o" -//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[FILE1HOST]].o", "[[FILE2HOST]].o", "[[KERNELO]].o", "[[WRAP]].o"], output: "{{.*}}" +//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE1HOST:.*\.o]]", "{{.*\.o}}"] +//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE2HOST:.*\.o]]", "{{.*\.o}}"] +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["{{.*\.o}}", "[[FILE1DEV:.*\.o]]"] +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE1DEV]]"], output: "[[FILE1SPV:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["{{.*\.o}}", "[[FILE2DEV:.*\.o]]"] +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE2DEV]]"], output: "[[FILE2SPV:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "SYCL::Linker", inputs: ["[[FILE1SPV]]", "[[FILE2SPV]]"], output: "[[LINK1:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "SYCL::Linker", inputs: ["[[LINK1]]", "{{.*\.bc}}"], output: "[[LINK2:.*\.bc]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "clang", inputs: ["{{.*}}.bc"], output: "[[KERNELO:.*\.o]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "SYCL post link", inputs: ["[[LINK2]]"], output: "[[POSTL:.*\.table]]" +//CHECK_BINDINGS_MULTI_TU:# "native_cpu" - "offload wrapper", inputs: ["[[POSTL]]"], output: "[[WRAP:.*\.o]]" +//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[FILE1HOST]]", "[[FILE2HOST]]", "[[KERNELO]]", "[[WRAP]]"], output: "{{.*}}" diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index 8664d8023e19..9d483257ed57 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -2,11 +2,11 @@ // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -target aarch64-unknown-linux-gnu %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-AARCH64 -// checks that the host and device triple are the same, and that the sycl-native-cpu LLVM option is set -// CHECK: clang{{.*}}"-triple" "[[TRIPLE:.*]]"{{.*}}"-aux-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" +// checks that the host triple is native_cpu, the device triple is set, and that the sycl-native-cpu LLVM option is set +// CHECK: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "[[TRIPLE:.*]]"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" // checks that the target triples are set correctly when the target is set explicitly -// CHECK-AARCH64: clang{{.*}}"-triple" "aarch64-unknown-linux-gnu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" +// CHECK-AARCH64: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-LINUX %s // CHECK-LINUX: {{.*}}"-fsycl-is-device"{{.*}}"-dwarf-version=[[DVERSION:.*]]" "-debugger-tuning=gdb" diff --git a/clang/test/Driver/sycl.c b/clang/test/Driver/sycl.c index a3abba9292d9..cc94c73188c7 100644 --- a/clang/test/Driver/sycl.c +++ b/clang/test/Driver/sycl.c @@ -145,6 +145,7 @@ // RUN: | FileCheck -check-prefix=DEBUG-WIN %s // RUN: %clang_cl -### -fsycl -Zi -c %s 2>&1 \ // RUN: | FileCheck -check-prefix=DEBUG-WIN %s -// DEBUG-WIN: {{.*}}"-fsycl-is-device"{{.*}}"-gcodeview" +// DEBUG-WIN: {{.*}}"-fsycl-is-device" +// DEBUG-WIN-NOT: "-gcodeview" // DEBUG-WIN: {{.*}}"-fsycl-is-host"{{.*}}"-gcodeview" // DEBUG-WIN-NOT: dwarf-version diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index ad048501f48b..3437d995fc90 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -468,9 +468,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) "+__opencl_c_3d_image_writes," "+__opencl_c_images," "+cl_khr_3d_image_writes") - if( ARCH STREQUAL "aarch64") - string( APPEND CL_3_0_EXTENSIONS ",+cl_clang_storage_class_specifiers,+__opencl_c_fp64,+cl_khr_int64_base_atomics" ) - endif() if( supports_generic_addrspace ) string( APPEND CL_3_0_EXTENSIONS ",+__opencl_c_generic_address_space" ) if( has_distinct_generic_addrspace ) diff --git a/sycl/test/check_device_code/native_cpu/shuffle_abi.cpp b/sycl/test/check_device_code/native_cpu/shuffle_abi.cpp deleted file mode 100644 index 8a94745f0810..000000000000 --- a/sycl/test/check_device_code/native_cpu/shuffle_abi.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// REQUIRES: native_cpu_ock && linux - -// This doesn't test every possible case since it is quite slow to compile. -// long and double are not tested as it seems to generate loops in the code -// rather than vector versions. - -// RUN: %clangxx -DTYPE=int -DVEC_WIDTH=2 -DOPER=TF_SHIFT_UP -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix UP_V2_INT %s -// RUN: %clangxx -DTYPE=short -DVEC_WIDTH=4 -DOPER=TF_SHIFT_DOWN -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix DOWN_V4_SHORT %s -// RUN: %clangxx -DTYPE=char -DVEC_WIDTH=4 -DOPER=TF_SHIFT_XOR -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix XOR_V4_CHAR %s -// RUN: %clangxx -DTYPE=float -DVEC_WIDTH=8 -DOPER=TF_SHIFT_UP -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix UP_V8_FLOAT %s -// RUN: %clangxx -DTYPE="unsigned int" -DVEC_WIDTH=8 -DOPER=TF_SELECT -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck --check-prefix SELECT_V8_SELECT_I32 %s - -// Tests that sub-group shuffles work even when abi is different to what is -// expected - -#include - -static constexpr size_t NumElems = VEC_WIDTH; -static constexpr size_t NumWorkItems = 64; - -// UP_V2_INT: double @_Z30__spirv_SubgroupShuffleUpINTELIDv2_iET_S1_S1_j(double noundef %[[ARG0:[0-9]+]], double noundef %[[ARG1:[0-9]+]] -// UP_V2_INT: %[[UPV2I32_BITCAST_OP0:[0-9]+]] = bitcast double %[[ARG0]] to <2 x i32> -// UP_V2_INT: %[[UPV2I32_BITCAST_OP1:[0-9]+]] = bitcast double %[[ARG1]] to <2 x i32> -// UP_V2_INT: %[[UPV2I32_CALL_SHUFFLE:[0-9]+]] = call <2 x i32> @__mux_sub_group_shuffle_up_v2i32(<2 x i32> %[[UPV2I32_BITCAST_OP0]], <2 x i32> %[[UPV2I32_BITCAST_OP1]] -// UP_V2_INT: %[[UPV2I32_BITCAST_RESULT:[0-9]+]] = bitcast <2 x i32> %[[UPV2I32_CALL_SHUFFLE]] to double -// UP_V2_INT: ret double %[[UPV2I32_BITCAST_RESULT]] - -// DOWN_V4_SHORT: double @_Z32__spirv_SubgroupShuffleDownINTELIDv4_sET_S1_S1_j(double noundef %[[ARG0:[0-9]+]], double noundef %[[ARG1:[0-9]+]] -// DOWN_V4_SHORT: %[[DOWNV4I16_BITCAST_OP0:[0-9]+]] = bitcast double %[[ARG0]] to <4 x i16> -// DOWN_V4_SHORT: %[[DOWNV4I16_BITCAST_OP1:[0-9]+]] = bitcast double %[[ARG1]] to <4 x i16> -// DOWN_V4_SHORT: %[[DOWNV4I16_CALL_SHUFFLE:[0-9]+]] = call <4 x i16> @__mux_sub_group_shuffle_down_v4i16(<4 x i16> %[[DOWNV4I16_BITCAST_OP0]], <4 x i16> %[[DOWNV4I16_BITCAST_OP1]] -// DOWN_V4_SHORT: %[[DOWNV4I16_BITCAST_RESULT:[0-9]+]] = bitcast <4 x i16> %[[DOWNV4I16_CALL_SHUFFLE]] to double -// DOWN_V4_SHORT: ret double %[[DOWNV4I16_BITCAST_RESULT]] - -// XOR_V4_CHAR: i32 @_Z31__spirv_SubgroupShuffleXorINTELIDv4_aET_S1_j(i32 noundef %[[ARG0:[0-9]+]], i32 -// XOR_V4_CHAR: %[[XORV4I8_BITCAST_OP0:[0-9]+]] = bitcast i32 %[[ARG0]] to <4 x i8> -// XOR_V4_CHAR: %[[XORV4I8_CALL_SHUFFLE:[0-9]+]] = call <4 x i8> @__mux_sub_group_shuffle_xor_v4i8(<4 x i8> %[[XORV4I8_BITCAST_OP0]], i32 -// XOR_V4_CHAR: %[[XORV4I8_BITCAST_RESULT:[0-9]+]] = bitcast <4 x i8> %[[XORV4I8_CALL_SHUFFLE]] to i32 -// XOR_V4_CHAR: ret i32 %[[XORV4I8_BITCAST_RESULT]] - -// UP_V8_FLOAT: <8 x float> @_Z30__spirv_SubgroupShuffleUpINTELIDv8_fET_S1_S1_j(ptr noundef byval(<8 x float>) align 32 %[[ARG0:[0-9]+]], ptr noundef byval(<8 x float>) align 32 %[[ARG1:[0-9]+]] -// UP_V8_FLOAT: %[[UPV8F32_BYVAL_LOAD_OP0:[0-9]+]] = load <8 x float>, ptr %[[ARG0]], align 32 -// UP_V8_FLOAT: %[[UPV8F32_BYVAL_LOAD_OP1:[0-9]+]] = load <8 x float>, ptr %[[ARG1]], align 32 -// UP_V8_FLOAT: %[[UPV8F32_CALL_SHUFFLE:[0-9]+]] = call <8 x float> @__mux_sub_group_shuffle_up_v8f32(<8 x float> %[[UPV8F32_BYVAL_LOAD_OP0]], <8 x float> %[[UPV8F32_BYVAL_LOAD_OP1]], i32 -// UP_V8_FLOAT: ret <8 x float> %[[UPV8F32_CALL_SHUFFLE:[0-9]+]] - -// SELECT_V8_SELECT_I32: <8 x i32> @_Z28__spirv_SubgroupShuffleINTELIDv8_jET_S1_j(ptr noundef byval(<8 x i32>) align 32 %[[ARG0:[0-9]+]], -// SELECT_V8_SELECT_I32: %[[SELV8I32_BYVAL_LOAD_OP0:[0-9]+]] = load <8 x i32>, ptr %[[ARG0]], align 32 -// SELECT_V8_SELECT_I32: %[[SELV8I32_CALL_SHUFFLE:[0-9]+]] = call <8 x i32> @__mux_sub_group_shuffle_v8i32(<8 x i32> %[[SELV8I32_BYVAL_LOAD_OP0]], i32 -// SELECT_V8_SELECT_I32: ret <8 x i32> %[[SELV8I32_CALL_SHUFFLE:[0-9]+]] - -enum TEST_FUNC_CHOICE { TF_SHIFT_DOWN, TF_SHIFT_UP, TF_SHIFT_XOR, TF_SELECT }; - -template -void ShuffleOpTest() { - sycl::queue Q; - - ShiftType ShiftRes[NumWorkItems]; - - { - sycl::buffer ShuffleResBuf{ShiftRes, NumWorkItems}; - - Q.submit([&](sycl::handler &CGH) { - sycl::accessor ShuffleRes{ShuffleResBuf, CGH, sycl::write_only}; - - CGH.parallel_for( - sycl::nd_range<1>{sycl::range<1>{NumWorkItems}, - sycl::range<1>{NumWorkItems}}, - [=](sycl::nd_item<1> It) { - int GID = It.get_global_linear_id(); - ShiftType ItemVal{0}; - for (int I = 0; I < NumElems; ++I) - ItemVal[I] = I; - - sycl::sub_group SG = It.get_sub_group(); - if (Choice == TF_SHIFT_DOWN) { - ShuffleRes[GID] = sycl::shift_group_left(SG, ItemVal); - } else if (Choice == TF_SHIFT_UP) { - ShuffleRes[GID] = sycl::shift_group_right(SG, ItemVal); - } else if (Choice == TF_SHIFT_XOR) { - ShuffleRes[GID] = sycl::permute_group_by_xor(SG, ItemVal, 1); - } else if (Choice == TF_SELECT) { - ShuffleRes[GID] = sycl::select_from_group(SG, ItemVal, 1); - } - }); - }); - } -} - -int main() { - ShuffleOpTest, OPER>(); - return 0; -} diff --git a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp index c1bcd6ad0f82..afc019849eab 100644 --- a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp +++ b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-linux.cpp @@ -2,4 +2,4 @@ // REQUIRES: native_cpu && linux // RUN: %clang -### -fsycl -fsycl-targets=native_cpu -target x86_64-unknown-linux-gnu %s 2> %t.ncpu.out // RUN: FileCheck %s --input-file %t.ncpu.out -// CHECK: {{(\\|/)}}remangled-l64-signed_char.libspirv-x86_64-unknown-linux-gnu.bc" +// CHECK: {{(\\|/)}}remangled-l64-signed_char.libspirv-native_cpu.bc" diff --git a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp index ef146e5110e0..915bf0af86b4 100644 --- a/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp +++ b/sycl/test/check_device_code/native_cpu/sycl-native-cpu-libclc-windows.cpp @@ -4,4 +4,4 @@ // Check that l32 variant is selected for Windows // RUN: %clang -### -fsycl -fsycl-targets=native_cpu -target x86_64-windows %s 2> %t-win.ncpu.out // RUN: FileCheck %s --input-file %t-win.ncpu.out --check-prefix=CHECK-WIN -// CHECK-WIN: {{(\\|/)}}remangled-l32-signed_char.libspirv-x86_64-unknown-windows-msvc.bc" +// CHECK-WIN: {{(\\|/)}}remangled-l32-signed_char.libspirv-native_cpu.bc" From 8e51a35fc7e3bbd44a2ba989180d16e3d6f074ea Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Thu, 13 Mar 2025 11:12:44 +0000 Subject: [PATCH 03/12] Add comments, add check. * Hopefully explain better how the NativeCPU compilation process works. * Add a check to clang/test/Driver/sycl-native-cpu.cpp that we suppress warnings about module target mismatch. --- clang/lib/Driver/ToolChains/Clang.cpp | 7 +++++-- clang/test/Driver/sycl-native-cpu.cpp | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 70808d546662..f7367736ed81 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5521,8 +5521,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsCuda || IsHIP || IsSYCL) IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment(); - // Adjust for SYCL NativeCPU compilations. When compiling in device mode, we - // adjust to host after the initial compilation. + // Adjust for SYCL NativeCPU compilations. When compiling in device mode, the + // first compilation uses the NativeCPU target for LLVM IR generation, the + // second compilation uses the host target for machine code generation. const bool IsSYCLNativeCPU = isSYCLNativeCPU(Triple); if (IsSYCL && IsSYCLDevice && IsSYCLNativeCPU && AuxTriple && isa(JA)) { @@ -6037,6 +6038,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D); } if (IsSYCLDevice && IsSYCLNativeCPU) { + // NativeCPU generates an initial LLVM module for an unknown target, then + // compiles that for host. Avoid generating a warning for that. CmdArgs.push_back("-Wno-override-module"); CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-sycl-native-cpu-backend"); diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index 9d483257ed57..90f32d1aca8d 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -3,7 +3,7 @@ // checks that the host triple is native_cpu, the device triple is set, and that the sycl-native-cpu LLVM option is set -// CHECK: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "[[TRIPLE:.*]]"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" +// CHECK: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "{{.*}}" "-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" // checks that the target triples are set correctly when the target is set explicitly // CHECK-AARCH64: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" @@ -19,8 +19,11 @@ // CHECK-WIN-NOT: dwarf // checks that -sycl-opt is not enabled by default on NativeCPU so that the full llvm optimization is enabled +// Also check that we suppress warnings about the intentional module mismatch // RUN: %clang -fsycl -fsycl-targets=native_cpu -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s // CHECK-OPTS-NOT: -sycl-opt +// CHECK-OPTS: "-Wno-override-module" +// CHECK-OPTS-NOT: -sycl-opt // RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s // CHECK-NONATIVECPU-NOT: "-D" "__SYCL_NATIVE_CPU__" From b4a9128d7754fa579a60280dbd68511314d388a4 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Thu, 13 Mar 2025 11:17:33 +0000 Subject: [PATCH 04/12] Also check for -sycl-native-cpu-backend being passed. --- clang/test/Driver/sycl-native-cpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index 90f32d1aca8d..f35162b9f69e 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -19,10 +19,10 @@ // CHECK-WIN-NOT: dwarf // checks that -sycl-opt is not enabled by default on NativeCPU so that the full llvm optimization is enabled -// Also check that we suppress warnings about the intentional module mismatch +// Also check that we pass the expected backend uptions. // RUN: %clang -fsycl -fsycl-targets=native_cpu -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s // CHECK-OPTS-NOT: -sycl-opt -// CHECK-OPTS: "-Wno-override-module" +// CHECK-OPTS: "-Wno-override-module" "-mllvm" "-sycl-native-cpu-backend" // CHECK-OPTS-NOT: -sycl-opt // RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s From f23dc503e18f9484d43b79f38c54b9944ba49697 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Thu, 13 Mar 2025 14:43:13 +0000 Subject: [PATCH 05/12] Check that -Wno-override-module -mllvm -sycl-native-cpu-backend appear on the correct line --- clang/test/Driver/sycl-native-cpu.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index f35162b9f69e..16de5a9461bf 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -21,8 +21,10 @@ // checks that -sycl-opt is not enabled by default on NativeCPU so that the full llvm optimization is enabled // Also check that we pass the expected backend uptions. // RUN: %clang -fsycl -fsycl-targets=native_cpu -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s +// CHECK-OPTS: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "[[TRIPLE:[^"]*]]" +// CHECK-OPTS: clang{{.*}}"-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-device" // CHECK-OPTS-NOT: -sycl-opt -// CHECK-OPTS: "-Wno-override-module" "-mllvm" "-sycl-native-cpu-backend" +// CHECK-OPTS-SAME: "-Wno-override-module" "-mllvm" "-sycl-native-cpu-backend" // CHECK-OPTS-NOT: -sycl-opt // RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s From 430802a8964997a93ff681238653d487e2c189ff Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Mon, 17 Mar 2025 12:44:57 +0000 Subject: [PATCH 06/12] Drop redundant native_cpu from dirs. This is getting added through ${DARCH} already. --- libclc/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 3437d995fc90..9074e258fdef 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -338,10 +338,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( DARCH ${ARCH} ) endif() - if( ARCH STREQUAL native_cpu ) - LIST( APPEND dirs native_cpu ) - endif() - set( clc_lib_files ) libclc_configure_lib_source( clc_lib_files From 1dc268b3384e10b81a925570a30bb5a40efa4721 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 21 Mar 2025 10:21:19 +0000 Subject: [PATCH 07/12] Preserve host target args --- clang/lib/Driver/Compilation.cpp | 4 +++- clang/test/Driver/sycl-native-cpu.cpp | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index a07e81892372..1a91cbaa998d 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Driver/Compilation.h" +#include "ToolChains/SYCL.h" #include "clang/Basic/LLVM.h" #include "clang/Driver/Action.h" #include "clang/Driver/Driver.h" @@ -127,7 +128,8 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, if (DeviceOffloadKind == Action::OFK_OpenMP || DeviceOffloadKind == Action::OFK_SYCL) { const ToolChain *HostTC = getSingleOffloadToolChain(); - bool SameTripleAsHost = (TC->getTriple() == HostTC->getTriple()); + bool SameTripleAsHost = (TC->getTriple() == HostTC->getTriple()) || + isSYCLNativeCPU(TC->getTriple()); OffloadArgs = TC->TranslateOffloadTargetArgs( *TranslatedArgs, SameTripleAsHost, AllocatedArgs, DeviceOffloadKind); } diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index 16de5a9461bf..1180932447ee 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -19,13 +19,15 @@ // CHECK-WIN-NOT: dwarf // checks that -sycl-opt is not enabled by default on NativeCPU so that the full llvm optimization is enabled -// Also check that we pass the expected backend uptions. -// RUN: %clang -fsycl -fsycl-targets=native_cpu -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s +// Also check that we pass the expected backend options. +// RUN: %clang -fsycl -fsycl-targets=native_cpu -target aarch64-unknown-linux-gnu -march=armv9.4-a -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s // CHECK-OPTS: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "[[TRIPLE:[^"]*]]" // CHECK-OPTS: clang{{.*}}"-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-device" // CHECK-OPTS-NOT: -sycl-opt // CHECK-OPTS-SAME: "-Wno-override-module" "-mllvm" "-sycl-native-cpu-backend" // CHECK-OPTS-NOT: -sycl-opt +// CHECK-OPTS-SAME: "-target-feature" "+v9.4a" +// CHECK-OPTS-NOT: -sycl-opt // RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s // CHECK-NONATIVECPU-NOT: "-D" "__SYCL_NATIVE_CPU__" From 737de6691ff600803ca4b253cf46d3097b8fe8f0 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 21 Mar 2025 15:06:03 +0000 Subject: [PATCH 08/12] Change -target options to --target=. --- clang/test/Driver/sycl-native-cpu.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index 1180932447ee..6279052266ef 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu %s -### 2>&1 | FileCheck %s -// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -target aarch64-unknown-linux-gnu %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-AARCH64 +// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-AARCH64 // checks that the host triple is native_cpu, the device triple is set, and that the sycl-native-cpu LLVM option is set @@ -8,19 +8,19 @@ // checks that the target triples are set correctly when the target is set explicitly // CHECK-AARCH64: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" -// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-LINUX %s +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-LINUX %s // CHECK-LINUX: {{.*}}"-fsycl-is-device"{{.*}}"-dwarf-version=[[DVERSION:.*]]" "-debugger-tuning=gdb" // CHECK-LINUX-DAG: {{.*}}"-fsycl-is-host"{{.*}}"-dwarf-version=[[DVERSION]]" "-debugger-tuning=gdb" // CHECK-LINUX-NOT: codeview -// RUN: %clang -### -target x86_64-windows-msvc -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-WIN %s +// RUN: %clang -### --target=x86_64-windows-msvc -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-WIN %s // CHECK-WIN: {{.*}}"-fsycl-is-device"{{.*}}"-gcodeview" // CHECK-WIN-DAG: {{.*}}"-fsycl-is-host"{{.*}}"-gcodeview" // CHECK-WIN-NOT: dwarf // checks that -sycl-opt is not enabled by default on NativeCPU so that the full llvm optimization is enabled // Also check that we pass the expected backend options. -// RUN: %clang -fsycl -fsycl-targets=native_cpu -target aarch64-unknown-linux-gnu -march=armv9.4-a -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s +// RUN: %clang -fsycl -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu -march=armv9.4-a -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s // CHECK-OPTS: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "[[TRIPLE:[^"]*]]" // CHECK-OPTS: clang{{.*}}"-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-device" // CHECK-OPTS-NOT: -sycl-opt From 24072726fd0cddc0be19974eea6304ea10adaaef Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 21 Mar 2025 15:14:30 +0000 Subject: [PATCH 09/12] Drop unnecessary check for no -sycl-opt after -Wno-override-module. --- clang/test/Driver/sycl-native-cpu.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index 6279052266ef..dccb20cec166 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -25,9 +25,7 @@ // CHECK-OPTS: clang{{.*}}"-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-device" // CHECK-OPTS-NOT: -sycl-opt // CHECK-OPTS-SAME: "-Wno-override-module" "-mllvm" "-sycl-native-cpu-backend" -// CHECK-OPTS-NOT: -sycl-opt // CHECK-OPTS-SAME: "-target-feature" "+v9.4a" -// CHECK-OPTS-NOT: -sycl-opt // RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s // CHECK-NONATIVECPU-NOT: "-D" "__SYCL_NATIVE_CPU__" From 35b40430ed26581cad32965e60edd2e89cb3f27a Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Tue, 25 Mar 2025 12:32:18 +0000 Subject: [PATCH 10/12] [NFC] Adjust comment style, use LangASMap. --- clang/lib/Basic/Targets.cpp | 2 +- clang/lib/Basic/Targets/NativeCPU.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 7d66abbf8ba0..128599cada56 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -119,7 +119,7 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, return nullptr; case llvm::Triple::UnknownArch: - /* native_cpu is only known to Clang, not to LLVM. */ + // native_cpu is only known to Clang, not to LLVM. if (Triple.str() == "native_cpu") return std::make_unique(Triple, Opts); diff --git a/clang/lib/Basic/Targets/NativeCPU.cpp b/clang/lib/Basic/Targets/NativeCPU.cpp index 1f3929f7c6a2..685cf9093645 100644 --- a/clang/lib/Basic/Targets/NativeCPU.cpp +++ b/clang/lib/Basic/Targets/NativeCPU.cpp @@ -16,7 +16,7 @@ using namespace clang; using namespace clang::targets; -static const unsigned NativeCPUASMap[] = { +static const LangASMap NativeCPUASMap = { 0, // Default 1, // opencl_global 3, // opencl_local @@ -78,8 +78,8 @@ NativeCPUTargetInfo::NativeCPUTargetInfo(const llvm::Triple &, LongLongAlign = HostTarget->getLongLongAlign(); PointerWidth = HostTarget->getPointerWidth(LangAS::Default); PointerAlign = HostTarget->getPointerAlign(LangAS::Default); - MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0, - /* HasNonWeakDef = */ true); + MinGlobalAlign = HostTarget->getMinGlobalAlign(/*TypeSize=*/0, + /*HasNonWeakDef=*/true); NewAlign = HostTarget->getNewAlign(); DefaultAlignForAttributeAligned = HostTarget->getDefaultAlignForAttributeAligned(); From abe8b208b793465db5629f6dfb58b00cf52b4fe3 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Tue, 8 Apr 2025 14:35:38 +0100 Subject: [PATCH 11/12] clang-format --- clang/lib/Basic/Targets/NativeCPU.h | 4 +++- clang/lib/Driver/Driver.cpp | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/lib/Basic/Targets/NativeCPU.h b/clang/lib/Basic/Targets/NativeCPU.h index 1bac5cb439a2..44106cd8d028 100644 --- a/clang/lib/Basic/Targets/NativeCPU.h +++ b/clang/lib/Basic/Targets/NativeCPU.h @@ -29,7 +29,9 @@ class LLVM_LIBRARY_VISIBILITY NativeCPUTargetInfo final : public TargetInfo { DefineStd(Builder, "NativeCPU", Opts); } - SmallVector getTargetBuiltins() const override { return {}; } + SmallVector getTargetBuiltins() const override { + return {}; + } BuiltinVaListKind getBuiltinVaListKind() const override { if (HostTarget) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f15cd9c3589f..b222313be9d3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -10566,8 +10566,8 @@ const ToolChain &Driver::getOffloadToolChain( break; default: if (Kind == Action::OFK_SYCL && isSYCLNativeCPU(Target)) - TC = std::make_unique(*this, Target, - *HostTC, Args); + TC = std::make_unique(*this, Target, *HostTC, + Args); break; } } From b18fd5a9ba8ca4091ad3035d2bdd00657c546203 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Tue, 8 Apr 2025 15:52:16 +0100 Subject: [PATCH 12/12] Silent conflict with last pulldown: OffloadTargetInfo::isTripleValid() now needs to check for normalized triple. --- clang/lib/Driver/OffloadBundler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index cc811fba74ac..714fd9d37aea 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -178,7 +178,7 @@ bool OffloadTargetInfo::isOffloadKindCompatible( bool OffloadTargetInfo::isTripleValid() const { return !Triple.str().empty() && (Triple.getArch() != Triple::UnknownArch || - Triple.str() == "native_cpu"); + Triple.str() == "native_cpu---"); } bool OffloadTargetInfo::operator==(const OffloadTargetInfo &Target) const {