From a76316bfb104234c4d011db6f0d1b6f8f29880d4 Mon Sep 17 00:00:00 2001 From: Ajay Kumar Patel <131133949+ajay-fuji@users.noreply.github.com> Date: Wed, 21 Feb 2024 14:14:46 +0530 Subject: [PATCH] Enable ARM(SVE) CPU support with reference backend (#2614) --- .ci/env/openblas.sh | 2 +- .ci/env/tbb.sh | 83 +++++++ .ci/scripts/build.bat | 2 +- .ci/scripts/build.sh | 20 ++ .ci/scripts/test.sh | 15 +- cmake/scripts/generate_config.cmake | 3 +- cmake/templates/oneDALConfig.cmake.in | 4 +- .../algorithm_container_base_batch.h | 8 + .../algorithm_container_base_common.h | 19 +- cpp/daal/include/services/daal_defines.h | 11 + cpp/daal/include/services/env_detect.h | 18 +- .../internal/aarch64/aarch64_kernel_defines.h | 41 ++++ .../services/internal/daal_kernel_defines.h | 83 +------ .../internal/x86_64/x86_64_kernel_defines.h | 96 ++++++++ .../algorithms/algorithm_hyperparameter.cpp | 5 +- .../algorithms/covariance/covariance_impl.i | 9 +- ...ication_predict_dense_default_batch_impl.i | 9 +- cpp/daal/src/algorithms/kernel_config.h | 35 ++- cpp/daal/src/algorithms/kernel_inst_arm.h | 71 ++++++ cpp/daal/src/algorithms/kernel_inst_x86.h | 28 --- .../algorithms/qr/qr_dense_default_pcl_impl.i | 19 ++ cpp/daal/src/externals/service_dispatch.h | 58 +++-- .../compiler/generic/env_detect_features.cpp | 54 +++-- cpp/daal/src/services/env_detect.cpp | 12 +- cpp/daal/src/services/service_defines.h | 31 ++- cpp/daal/src/services/service_topo.h | 6 +- cpp/daal/src/threading/export_lnxarm.ref.def | 63 ++++++ .../cpu/finalize_compute_kernel_dense.cpp | 11 +- .../cpu/partial_compute_kernel_dense.cpp | 9 +- .../parameters/cpu/compute_parameters.cpp | 10 +- .../cpu/vertex_similarity_default_kernel.hpp | 4 +- ...ertex_similarity_default_kernel_avx512.hpp | 3 +- .../vertex_similarity_default_kernel_cpu.cpp | 1 + .../vertex_similarity_default_kernel_skx.cpp | 5 + .../backend/cpu/finalize_train_kernel_cov.cpp | 11 +- .../backend/cpu/partial_train_kernel_cov.cpp | 12 +- .../algo/pca/backend/cpu/train_kernel_cov.cpp | 12 +- .../backend/cpu/compiler_adapt.hpp | 23 ++ .../backend/cpu/intersection_tc.hpp | 5 + cpp/oneapi/dal/backend/dispatcher.cpp | 9 + cpp/oneapi/dal/backend/dispatcher.hpp | 23 ++ cpp/oneapi/dal/backend/dispatcher_cpu.hpp | 14 ++ cpp/oneapi/dal/backend/interop/common.hpp | 8 + cpp/oneapi/dal/backend/micromkl/macro.hpp | 23 ++ .../primitives/intersection/intersection.hpp | 5 + cpp/oneapi/dal/detail/dispatcher.hpp | 14 +- cpp/oneapi/dal/detail/policy.hpp | 14 ++ deploy/local/dal | 10 +- deploy/local/vars_lnx.sh | 16 +- deploy/nuget/prepare_dal_nuget.sh | 13 +- deploy/pkg-config/generate_pkgconfig.py | 28 ++- dev/bazel/config/cpudetect.cpp | 42 ++-- dev/download_tbb.sh | 1 + ...lr.clang.mkl.mk => cmplr.clang.mkl.32e.mk} | 2 +- ...lr.clang.ref.mk => cmplr.clang.ref.32e.mk} | 2 +- dev/make/cmplr.clang.ref.arm.mk | 45 ++++ dev/make/cmplr.dpcpp.mk | 2 +- ...{cmplr.gnu.mkl.mk => cmplr.gnu.mkl.32e.mk} | 2 +- ...{cmplr.gnu.ref.mk => cmplr.gnu.ref.32e.mk} | 2 +- dev/make/cmplr.gnu.ref.arm.mk | 48 ++++ ...{cmplr.icc.mkl.mk => cmplr.icc.mkl.32e.mk} | 2 +- ...{cmplr.icx.mkl.mk => cmplr.icx.mkl.32e.mk} | 2 +- .../{cmplr.vc.mkl.mk => cmplr.vc.mkl.32e.mk} | 0 dev/make/identify_os.sh | 10 +- examples/cmake/setup_examples.cmake | 13 +- makefile | 212 ++++++++++++++---- 66 files changed, 1220 insertions(+), 253 deletions(-) create mode 100755 .ci/env/tbb.sh create mode 100644 cpp/daal/include/services/internal/aarch64/aarch64_kernel_defines.h create mode 100644 cpp/daal/include/services/internal/x86_64/x86_64_kernel_defines.h create mode 100644 cpp/daal/src/algorithms/kernel_inst_arm.h create mode 100644 cpp/daal/src/threading/export_lnxarm.ref.def rename dev/make/{cmplr.clang.mkl.mk => cmplr.clang.mkl.32e.mk} (97%) rename dev/make/{cmplr.clang.ref.mk => cmplr.clang.ref.32e.mk} (98%) create mode 100644 dev/make/cmplr.clang.ref.arm.mk rename dev/make/{cmplr.gnu.mkl.mk => cmplr.gnu.mkl.32e.mk} (98%) rename dev/make/{cmplr.gnu.ref.mk => cmplr.gnu.ref.32e.mk} (98%) create mode 100644 dev/make/cmplr.gnu.ref.arm.mk rename dev/make/{cmplr.icc.mkl.mk => cmplr.icc.mkl.32e.mk} (97%) rename dev/make/{cmplr.icx.mkl.mk => cmplr.icx.mkl.32e.mk} (96%) rename dev/make/{cmplr.vc.mkl.mk => cmplr.vc.mkl.32e.mk} (100%) diff --git a/.ci/env/openblas.sh b/.ci/env/openblas.sh index f154c5463df..2a2e8ddf448 100755 --- a/.ci/env/openblas.sh +++ b/.ci/env/openblas.sh @@ -16,7 +16,7 @@ #=============================================================================== sudo apt-get update -sudo apt-get install build-essential gcc gfortran +sudo apt-get -y install build-essential gcc gfortran git clone https://github.com/xianyi/OpenBLAS.git CoreCount=$(lscpu -p | grep -Ev '^#' | wc -l) pushd OpenBLAS diff --git a/.ci/env/tbb.sh b/.ci/env/tbb.sh new file mode 100755 index 00000000000..3b6a991647c --- /dev/null +++ b/.ci/env/tbb.sh @@ -0,0 +1,83 @@ +#!/bin/bash +#=============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# Function to display help +show_help() { + echo "Usage: $0 [-h]" + echo " -h Display this information" + echo " Set CC and CXX environment variables to change the compiler. Default is GNU." +} + +# Check for command-line options +while getopts ":h" opt; do + case $opt in + h) + show_help + exit 0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + show_help + exit 1 + ;; + esac +done + +# Set default values for CXX and CC +CXX="${CXX:-g++}" +CC="${CC:-gcc}" + +echo "CXX is set to: $CXX" +echo "CC is set to: $CC" + +TBB_VERSION="v2021.10.0" + +arch=$(uname -m) +if [ "${arch}" == "x86_64" ]; then + arch_dir="intel64" +elif [ "${arch}" == "aarch64" ]; then + arch_dir="arm" +else + arch_dir=${arch} +fi + +sudo apt-get update +sudo apt-get install build-essential gcc gfortran cmake -y +git clone --depth 1 --branch ${TBB_VERSION} https://github.com/oneapi-src/oneTBB.git onetbb-src + +CoreCount=$(lscpu -p | grep -Ev '^#' | wc -l) + +rm -rf __deps/tbb +pushd onetbb-src +mkdir build +pushd build +cmake -DCMAKE_CXX_COMPILER=${CXX} -DCMAKE_BUILD_TYPE=Release -DTBB_TEST=OFF -DTBB_STRICT_PROTOTYPES=OFF -DCMAKE_INSTALL_PREFIX=../../__deps/tbb .. +make -j${CoreCount} +make install +popd +popd +rm -rf onetbb-src + +pushd __deps/tbb + mkdir -p lnx + mv lib/ lnx/ + mv include/ lnx/ + pushd lnx + mkdir -p lib/${arch_dir}/gcc4.8 + mv lib/libtbb* lib/${arch_dir}/gcc4.8 + popd +popd diff --git a/.ci/scripts/build.bat b/.ci/scripts/build.bat index f002b5e755e..d218464707c 100644 --- a/.ci/scripts/build.bat +++ b/.ci/scripts/build.bat @@ -40,4 +40,4 @@ call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Buil echo make %1 -j%CPUCOUNT% COMPILER=%2 PLAT=win32e REQCPU=%3 make %1 -j%CPUCOUNT% COMPILER=%2 PLAT=win32e REQCPU=%3 -cmake -DINSTALL_DIR=__release_win_vc\daal\latest\lib\cmake\oneDAL -P cmake\scripts\generate_config.cmake +cmake -DINSTALL_DIR=__release_win_vc\daal\latest\lib\cmake\oneDAL -DARCH_DIR=intel64 -P cmake\scripts\generate_config.cmake diff --git a/.ci/scripts/build.sh b/.ci/scripts/build.sh index 8d47bbe9655..62b3623a3fe 100755 --- a/.ci/scripts/build.sh +++ b/.ci/scripts/build.sh @@ -1,6 +1,7 @@ #! /bin/bash #=============================================================================== # Copyright 2019 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -47,7 +48,17 @@ PLATFORM=$(bash dev/make/identify_os.sh) OS=${PLATFORM::3} ARCH=${PLATFORM:3:3} +if [[ "${ARCH}" == "32e" ]] +then optimizations=${optimizations:-avx2} +elif [[ "${ARCH}" == "arm" ]] +then +optimizations=${optimizations:-sve} +else +echo "Unknown architecture '${ARCH}'" +exit 1 +fi + backend_config=${backend_config:-mkl} GLOBAL_RETURN=0 @@ -97,7 +108,16 @@ elif [ "${backend_config}" == "ref" ]; then else echo "Not supported backend env" fi + +#TBB setup +if [[ "${ARCH}" == "32e" ]] +then $(pwd)/dev/download_tbb.sh +elif [[ "${ARCH}" == "arm" ]] +then +$(pwd)/.ci/env/tbb.sh +fi + echo "Calling make" make ${target:-daal_c} ${make_op} \ COMPILER=${compiler} \ diff --git a/.ci/scripts/test.sh b/.ci/scripts/test.sh index d014eb9ede4..b9856cf8554 100755 --- a/.ci/scripts/test.sh +++ b/.ci/scripts/test.sh @@ -1,6 +1,7 @@ #! /bin/bash #=============================================================================== # Copyright 2019 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -54,7 +55,17 @@ TESTING_RETURN=0 PLATFORM=$(bash dev/make/identify_os.sh) OS=${PLATFORM::3} ARCH=${PLATFORM:3:3} -full_arch=intel64 +if [ "$ARCH" == "32e" ]; then + full_arch=intel64 + arch_dir=intel_intel64 +elif [ "$ARCH" == "arm" ]; then + full_arch=arm + arch_dir=arm_aarch64 +else + echo "Unknown architecture ${ARCH} detected for platform ${PLATFORM}" + exit 1 +fi + build_system=${build_system:-cmake} backend=${backend:-mkl} @@ -161,7 +172,7 @@ for link_mode in ${link_modes}; do fi output_result= err= - cmake_results_dir="_cmake_results/intel_intel64_${lib_ext}" + cmake_results_dir="_cmake_results/${arch_dir}_${lib_ext}" for p in ${cmake_results_dir}/*; do e=$(basename "$p") ${p} 2>&1 > ${e}.res diff --git a/cmake/scripts/generate_config.cmake b/cmake/scripts/generate_config.cmake index fb04a832435..a891736dd46 100644 --- a/cmake/scripts/generate_config.cmake +++ b/cmake/scripts/generate_config.cmake @@ -1,5 +1,6 @@ #=============================================================================== # Copyright 2021 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +20,7 @@ set(DAL_ROOT_REL_PATH "../../..") set(INC_REL_PATH "include") set(LIB_REL_PATH "lib") set(DLL_REL_PATH "redist") -set(SUB_DIR "intel64") +set(ARCH_DIR_ONEDAL "${ARCH_DIR_ONEDAL}") # Parse version info if possible if (NOT "$ENV{DALROOT}" STREQUAL "") diff --git a/cmake/templates/oneDALConfig.cmake.in b/cmake/templates/oneDALConfig.cmake.in index 26ce3143a2b..73a63b625e7 100644 --- a/cmake/templates/oneDALConfig.cmake.in +++ b/cmake/templates/oneDALConfig.cmake.in @@ -209,14 +209,14 @@ foreach (_dal_component ${DAL_LIBS}) find_library( _dal_lib NAMES "${LIB_PREFIX}${_dal_component}${LIB_EXT}" - PATH_SUFFIXES "lib/intel64" + PATH_SUFFIXES "lib/@ARCH_DIR_ONEDAL@" PATHS "${_dal_root}") elseif (${ONEDAL_LINK} STREQUAL "dynamic") add_library(oneDAL::${_dal_component} SHARED IMPORTED) find_library( _dal_lib NAMES "${LIB_PREFIX}${_dal_component}${DLL_EXT}" - PATH_SUFFIXES "lib/intel64" + PATH_SUFFIXES "lib/@ARCH_DIR_ONEDAL@" PATHS "${_dal_root}") endif() diff --git a/cpp/daal/include/algorithms/algorithm_container_base_batch.h b/cpp/daal/include/algorithms/algorithm_container_base_batch.h index 2efe6fdf942..03b3d48b16e 100644 --- a/cpp/daal/include/algorithms/algorithm_container_base_batch.h +++ b/cpp/daal/include/algorithms/algorithm_container_base_batch.h @@ -1,6 +1,7 @@ /* file: algorithm_container_base_batch.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -142,11 +143,18 @@ class AlgorithmContainerImpl : public AlgorithmContainer * \tparam sse42Container Implementation for Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) * \tparam avx2Container Implementation for Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) * \tparam avx512Container Implementation for Intel(R) Xeon(R) processors based on Intel AVX-512 + * \tparam sve Implementation for ARM processors based on Arm Scalable Vector Extension */ + +#if defined(TARGET_X86_64) template class DAAL_EXPORT AlgorithmDispatchContainer : public AlgorithmContainerImpl +#elif defined(TARGET_ARM) +template +class DAAL_EXPORT AlgorithmDispatchContainer : public AlgorithmContainerImpl +#endif { public: /** diff --git a/cpp/daal/include/algorithms/algorithm_container_base_common.h b/cpp/daal/include/algorithms/algorithm_container_base_common.h index 6d0c946fead..5f63a868634 100644 --- a/cpp/daal/include/algorithms/algorithm_container_base_common.h +++ b/cpp/daal/include/algorithms/algorithm_container_base_common.h @@ -1,6 +1,7 @@ /* file: algorithm_container_base_common.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +25,8 @@ #ifndef __ALGORITHM_CONTAINER_BASE_COMMON_H__ #define __ALGORITHM_CONTAINER_BASE_COMMON_H__ +#include "services/daal_defines.h" + #include "algorithms/algorithm_container_base.h" #include "services/error_handling.h" #include "services/internal/gpu_support_checker.h" @@ -53,8 +56,13 @@ namespace interface1 * \tparam avx2Container Implementation for Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) * \tparam avx512Container Implementation for Intel(R) Xeon(R) processors based on Intel AVX-512 */ + +#if defined(TARGET_X86_64) template +#elif defined(TARGET_ARM) +template +#endif class DAAL_EXPORT AlgorithmDispatchContainer : public AlgorithmContainerImpl { public: @@ -99,10 +107,15 @@ class DAAL_EXPORT AlgorithmDispatchContainer : public AlgorithmContainerImpl DAAL_KERNEL_SSE42_CONTAINER(ContainerTemplate, __VA_ARGS__) \ - DAAL_KERNEL_AVX2_CONTAINER(ContainerTemplate, __VA_ARGS__) \ +#if defined(TARGET_X86_64) + #define __DAAL_ALGORITHM_CONTAINER(Mode, ContainerTemplate, ...) \ + algorithms::AlgorithmDispatchContainer DAAL_KERNEL_SSE42_CONTAINER( \ + ContainerTemplate, __VA_ARGS__) DAAL_KERNEL_AVX2_CONTAINER(ContainerTemplate, __VA_ARGS__) \ DAAL_KERNEL_AVX512_CONTAINER(ContainerTemplate, __VA_ARGS__)> +#elif defined(TARGET_ARM) + #define __DAAL_ALGORITHM_CONTAINER(Mode, ContainerTemplate, ...) \ + algorithms::AlgorithmDispatchContainer DAAL_KERNEL_SVE_CONTAINER(ContainerTemplate, __VA_ARGS__)> +#endif /** @} */ } // namespace interface1 diff --git a/cpp/daal/include/services/daal_defines.h b/cpp/daal/include/services/daal_defines.h index 6d43749d7c8..5415d31dcb7 100644 --- a/cpp/daal/include/services/daal_defines.h +++ b/cpp/daal/include/services/daal_defines.h @@ -1,6 +1,7 @@ /* file: daal_defines.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +29,14 @@ #include // for size_t +#if defined(__x86_64__) || defined(__x86_64) || defined(__amd64) || defined(_M_AMD64) + #define TARGET_X86_64 +#endif + +#if defined(__ARM_ARCH) || defined(__aarch64__) + #define TARGET_ARM +#endif + #if (defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)) && !defined(SYCL_LANGUAGE_VERSION) #define DAAL_INTEL_CPP_COMPILER #endif @@ -65,6 +74,8 @@ #if !defined(DAAL_INT) #if defined(_WIN64) || defined(__x86_64__) #define DAAL_INT __int64 + #elif defined(TARGET_ARM) + #define DAAL_INT __int64 #else #define DAAL_INT __int32 #endif diff --git a/cpp/daal/include/services/env_detect.h b/cpp/daal/include/services/env_detect.h index 83f4040dfac..9f6ad24fef7 100644 --- a/cpp/daal/include/services/env_detect.h +++ b/cpp/daal/include/services/env_detect.h @@ -1,6 +1,7 @@ /* file: env_detect.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,11 +43,16 @@ namespace daal */ enum CpuType { +#if defined(TARGET_X86_64) sse2 = 0, /*!< Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2) */ sse42 = 2, /*!< Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) */ avx2 = 4, /*!< Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) */ avx512 = 6, /*!< Intel(R) Xeon(R) processors based on Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) */ lastCpuType = avx512 +#elif defined(TARGET_ARM) + sve = 0, /*!< ARM(R) processors based on Arm's Scalable Vector Extension (SVE) */ + lastCpuType = sve +#endif }; namespace services @@ -91,7 +97,12 @@ class DAAL_EXPORT Environment : public Base enum CpuTypeEnable { cpu_default = 0, /*!< Default processor type */ - avx512 = 2 /*!< Intel(R) Xeon(R) processors based on Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) \DAAL_DEPRECATED */ + +#if defined(TARGET_X86_64) + avx512 = 2 /*!< Intel(R) Xeon(R) processors based on Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) \DAAL_DEPRECATED */ +#elif defined(TARGET_ARM) + sve = 2, /*!< ARM(R) processors based on Arm's Scalable Vector Extension (SVE) */ +#endif }; /** @@ -167,7 +178,10 @@ class DAAL_EXPORT Environment : public Base _executionContext = internal::ImplAccessor::getImplPtr(ctx); } - services::internal::sycl::ExecutionContextIface & getDefaultExecutionContext() { return *_executionContext; } + services::internal::sycl::ExecutionContextIface & getDefaultExecutionContext() + { + return *_executionContext; + } private: Environment(); diff --git a/cpp/daal/include/services/internal/aarch64/aarch64_kernel_defines.h b/cpp/daal/include/services/internal/aarch64/aarch64_kernel_defines.h new file mode 100644 index 00000000000..799525128ef --- /dev/null +++ b/cpp/daal/include/services/internal/aarch64/aarch64_kernel_defines.h @@ -0,0 +1,41 @@ +/* file: aarch64_kernel_defines.h */ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef __aarch64_KERNEL_DEFINES_H__ +#define __aarch64_KERNEL_DEFINES_H__ + +#define DAAL_KERNEL_SVE + +#if defined(DAAL_KERNEL_SVE) + #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID + #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::sve + #define DAAL_KERNEL_SVE_ONLY(something) , something + #define DAAL_KERNEL_SVE_ONLY_CODE(...) __VA_ARGS__ + #define DAAL_KERNEL_SVE_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sve, __VA_ARGS__) + #define DAAL_KERNEL_SVE_CONTAINER1(ContainerTemplate, ...) extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sve, __VA_ARGS__); + #define DAAL_KERNEL_SVE_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, sve, __VA_ARGS__) + #define DAAL_KERNEL_SVE_CONTAINER_CASE_SYCL(ContainerTemplate, ...) +#else + #define DAAL_KERNEL_SVE_ONLY(something) + #define DAAL_KERNEL_SVE_ONLY_CODE(...) + #define DAAL_KERNEL_SVE_CONTAINER(ContainerTemplate, ...) + #define DAAL_KERNEL_SVE_CONTAINER1(ContainerTemplate, ...) + #define DAAL_KERNEL_SVE_CONTAINER_CASE(ContainerTemplate, ...) + #define DAAL_KERNEL_SVE_CONTAINER_CASE_SYCL(ContainerTemplate, ...) +#endif + +#endif diff --git a/cpp/daal/include/services/internal/daal_kernel_defines.h b/cpp/daal/include/services/internal/daal_kernel_defines.h index fd631a61f3f..f4f723dfd13 100644 --- a/cpp/daal/include/services/internal/daal_kernel_defines.h +++ b/cpp/daal/include/services/internal/daal_kernel_defines.h @@ -1,6 +1,7 @@ /* file: daal_kernel_defines.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,11 +32,18 @@ * @ingroup services * @{ */ + #define DAAL_KERNEL_SSE2 #define DAAL_KERNEL_SSE42 #define DAAL_KERNEL_AVX2 #define DAAL_KERNEL_AVX512 +#if defined(TARGET_X86_64) + #include "services/internal/x86_64/x86_64_kernel_defines.h" +#elif defined(TARGET_ARM) + #include "services/internal/aarch64/aarch64_kernel_defines.h" +#endif + #define DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, cpuType, ...) ContainerTemplate<__VA_ARGS__, cpuType> #define DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, cpuType, ...) \ case cpuType: \ @@ -50,81 +58,6 @@ case cpuType: break; \ } -#if defined(DAAL_KERNEL_SSE2) - #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID - #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::sse2 - #define DAAL_KERNEL_SSE2_ONLY(something) , something - #define DAAL_KERNEL_SSE2_ONLY_CODE(...) __VA_ARGS__ - #define DAAL_KERNEL_SSE2_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse2, __VA_ARGS__) - #define DAAL_KERNEL_SSE2_CONTAINER1(ContainerTemplate, ...) \ - extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse2, __VA_ARGS__); - #define DAAL_KERNEL_SSE2_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, sse2, __VA_ARGS__) -#else - #define DAAL_KERNEL_SSE2_ONLY(something) - #define DAAL_KERNEL_SSE2_ONLY_CODE(...) - #define DAAL_KERNEL_SSE2_CONTAINER(ContainerTemplate, ...) - #define DAAL_KERNEL_SSE2_CONTAINER1(ContainerTemplate, ...) - #define DAAL_KERNEL_SSE2_CONTAINER_CASE(ContainerTemplate, ...) - #define DAAL_KERNEL_SSE2_CONTAINER_CASE_SYCL(ContainerTemplate, ...) -#endif - -#if defined(DAAL_KERNEL_SSE42) - #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID - #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::sse42 - #define DAAL_KERNEL_SSE42_ONLY(something) , something - #define DAAL_KERNEL_SSE42_ONLY_CODE(...) __VA_ARGS__ - #define DAAL_KERNEL_SSE42_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse42, __VA_ARGS__) - #define DAAL_KERNEL_SSE42_CONTAINER1(ContainerTemplate, ...) \ - extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse42, __VA_ARGS__); - #define DAAL_KERNEL_SSE42_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, sse42, __VA_ARGS__) - #define DAAL_KERNEL_SSE42_CONTAINER_CASE_SYCL(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE_SYCL(ContainerTemplate, sse42, __VA_ARGS__) -#else - #define DAAL_KERNEL_SSE42_ONLY(something) - #define DAAL_KERNEL_SSE42_ONLY_CODE(...) - #define DAAL_KERNEL_SSE42_CONTAINER(ContainerTemplate, ...) - #define DAAL_KERNEL_SSE42_CONTAINER1(ContainerTemplate, ...) - #define DAAL_KERNEL_SSE42_CONTAINER_CASE(ContainerTemplate, ...) - #define DAAL_KERNEL_SSE42_CONTAINER_CASE_SYCL(ContainerTemplate, ...) -#endif - -#if defined(DAAL_KERNEL_AVX2) - #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID - #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::avx2 - #define DAAL_KERNEL_AVX2_ONLY(something) , something - #define DAAL_KERNEL_AVX2_ONLY_CODE(...) __VA_ARGS__ - #define DAAL_KERNEL_AVX2_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx2, __VA_ARGS__) - #define DAAL_KERNEL_AVX2_CONTAINER1(ContainerTemplate, ...) \ - extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx2, __VA_ARGS__); - #define DAAL_KERNEL_AVX2_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, avx2, __VA_ARGS__) - #define DAAL_KERNEL_AVX2_CONTAINER_CASE_SYCL(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE_SYCL(ContainerTemplate, avx2, __VA_ARGS__) -#else - #define DAAL_KERNEL_AVX2_ONLY(something) - #define DAAL_KERNEL_AVX2_ONLY_CODE(...) - #define DAAL_KERNEL_AVX2_CONTAINER(ContainerTemplate, ...) - #define DAAL_KERNEL_AVX2_CONTAINER1(ContainerTemplate, ...) - #define DAAL_KERNEL_AVX2_CONTAINER_CASE(ContainerTemplate, ...) - #define DAAL_KERNEL_AVX2_CONTAINER_CASE_SYCL(ContainerTemplate, ...) -#endif - -#if defined(DAAL_KERNEL_AVX512) - #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID - #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::avx512 - #define DAAL_KERNEL_AVX512_ONLY(something) , something - #define DAAL_KERNEL_AVX512_ONLY_CODE(...) __VA_ARGS__ - #define DAAL_KERNEL_AVX512_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx512, __VA_ARGS__) - #define DAAL_KERNEL_AVX512_CONTAINER1(ContainerTemplate, ...) \ - extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx512, __VA_ARGS__); - #define DAAL_KERNEL_AVX512_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, avx512, __VA_ARGS__) - #define DAAL_KERNEL_AVX512_CONTAINER_CASE_SYCL(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE_SYCL(ContainerTemplate, avx512, __VA_ARGS__) -#else - #define DAAL_KERNEL_AVX512_ONLY(something) - #define DAAL_KERNEL_AVX512_ONLY_CODE(...) - #define DAAL_KERNEL_AVX512_CONTAINER(ContainerTemplate, ...) - #define DAAL_KERNEL_AVX512_CONTAINER1(ContainerTemplate, ...) - #define DAAL_KERNEL_AVX512_CONTAINER_CASE(ContainerTemplate, ...) - #define DAAL_KERNEL_AVX512_CONTAINER_CASE_SYCL(ContainerTemplate, ...) -#endif - #define DAAL_EXPAND(...) __VA_ARGS__ /** @} */ diff --git a/cpp/daal/include/services/internal/x86_64/x86_64_kernel_defines.h b/cpp/daal/include/services/internal/x86_64/x86_64_kernel_defines.h new file mode 100644 index 00000000000..f9570309739 --- /dev/null +++ b/cpp/daal/include/services/internal/x86_64/x86_64_kernel_defines.h @@ -0,0 +1,96 @@ +/* file: x86_64_kernel_defines.h */ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef __x86_64_KERNEL_DEFINES_H__ +#define __x86_64_KERNEL_DEFINES_H__ + +#if defined(DAAL_KERNEL_SSE2) + #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID + #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::sse2 + #define DAAL_KERNEL_SSE2_ONLY(something) , something + #define DAAL_KERNEL_SSE2_ONLY_CODE(...) __VA_ARGS__ + #define DAAL_KERNEL_SSE2_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse2, __VA_ARGS__) + #define DAAL_KERNEL_SSE2_CONTAINER1(ContainerTemplate, ...) \ + extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse2, __VA_ARGS__); + #define DAAL_KERNEL_SSE2_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, sse2, __VA_ARGS__) +#else + #define DAAL_KERNEL_SSE2_ONLY(something) + #define DAAL_KERNEL_SSE2_ONLY_CODE(...) + #define DAAL_KERNEL_SSE2_CONTAINER(ContainerTemplate, ...) + #define DAAL_KERNEL_SSE2_CONTAINER1(ContainerTemplate, ...) + #define DAAL_KERNEL_SSE2_CONTAINER_CASE(ContainerTemplate, ...) + #define DAAL_KERNEL_SSE2_CONTAINER_CASE_SYCL(ContainerTemplate, ...) +#endif + +#if defined(DAAL_KERNEL_SSE42) + #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID + #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::sse42 + #define DAAL_KERNEL_SSE42_ONLY(something) , something + #define DAAL_KERNEL_SSE42_ONLY_CODE(...) __VA_ARGS__ + #define DAAL_KERNEL_SSE42_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse42, __VA_ARGS__) + #define DAAL_KERNEL_SSE42_CONTAINER1(ContainerTemplate, ...) \ + extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, sse42, __VA_ARGS__); + #define DAAL_KERNEL_SSE42_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, sse42, __VA_ARGS__) + #define DAAL_KERNEL_SSE42_CONTAINER_CASE_SYCL(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE_SYCL(ContainerTemplate, sse42, __VA_ARGS__) +#else + #define DAAL_KERNEL_SSE42_ONLY(something) + #define DAAL_KERNEL_SSE42_ONLY_CODE(...) + #define DAAL_KERNEL_SSE42_CONTAINER(ContainerTemplate, ...) + #define DAAL_KERNEL_SSE42_CONTAINER1(ContainerTemplate, ...) + #define DAAL_KERNEL_SSE42_CONTAINER_CASE(ContainerTemplate, ...) + #define DAAL_KERNEL_SSE42_CONTAINER_CASE_SYCL(ContainerTemplate, ...) +#endif + +#if defined(DAAL_KERNEL_AVX2) + #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID + #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::avx2 + #define DAAL_KERNEL_AVX2_ONLY(something) , something + #define DAAL_KERNEL_AVX2_ONLY_CODE(...) __VA_ARGS__ + #define DAAL_KERNEL_AVX2_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx2, __VA_ARGS__) + #define DAAL_KERNEL_AVX2_CONTAINER1(ContainerTemplate, ...) \ + extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx2, __VA_ARGS__); + #define DAAL_KERNEL_AVX2_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, avx2, __VA_ARGS__) + #define DAAL_KERNEL_AVX2_CONTAINER_CASE_SYCL(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE_SYCL(ContainerTemplate, avx2, __VA_ARGS__) +#else + #define DAAL_KERNEL_AVX2_ONLY(something) + #define DAAL_KERNEL_AVX2_ONLY_CODE(...) + #define DAAL_KERNEL_AVX2_CONTAINER(ContainerTemplate, ...) + #define DAAL_KERNEL_AVX2_CONTAINER1(ContainerTemplate, ...) + #define DAAL_KERNEL_AVX2_CONTAINER_CASE(ContainerTemplate, ...) + #define DAAL_KERNEL_AVX2_CONTAINER_CASE_SYCL(ContainerTemplate, ...) +#endif + +#if defined(DAAL_KERNEL_AVX512) + #undef DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID + #define DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID daal::avx512 + #define DAAL_KERNEL_AVX512_ONLY(something) , something + #define DAAL_KERNEL_AVX512_ONLY_CODE(...) __VA_ARGS__ + #define DAAL_KERNEL_AVX512_CONTAINER(ContainerTemplate, ...) , DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx512, __VA_ARGS__) + #define DAAL_KERNEL_AVX512_CONTAINER1(ContainerTemplate, ...) \ + extern template class DAAL_KERNEL_CONTAINER_TEMPL(ContainerTemplate, avx512, __VA_ARGS__); + #define DAAL_KERNEL_AVX512_CONTAINER_CASE(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE(ContainerTemplate, avx512, __VA_ARGS__) + #define DAAL_KERNEL_AVX512_CONTAINER_CASE_SYCL(ContainerTemplate, ...) DAAL_KERNEL_CONTAINER_CASE_SYCL(ContainerTemplate, avx512, __VA_ARGS__) +#else + #define DAAL_KERNEL_AVX512_ONLY(something) + #define DAAL_KERNEL_AVX512_ONLY_CODE(...) + #define DAAL_KERNEL_AVX512_CONTAINER(ContainerTemplate, ...) + #define DAAL_KERNEL_AVX512_CONTAINER1(ContainerTemplate, ...) + #define DAAL_KERNEL_AVX512_CONTAINER_CASE(ContainerTemplate, ...) + #define DAAL_KERNEL_AVX512_CONTAINER_CASE_SYCL(ContainerTemplate, ...) +#endif + +#endif diff --git a/cpp/daal/src/algorithms/algorithm_hyperparameter.cpp b/cpp/daal/src/algorithms/algorithm_hyperparameter.cpp index 17b4c923eef..81b157d79ff 100644 --- a/cpp/daal/src/algorithms/algorithm_hyperparameter.cpp +++ b/cpp/daal/src/algorithms/algorithm_hyperparameter.cpp @@ -1,6 +1,7 @@ /** file algorithm_hyperparameter.cpp */ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,10 +67,10 @@ struct HyperparameterImpl : public HyperparameterBaseImpl protected: /** Stores integer hyperparameters of the algorithm */ - HashTable _iHT; + HashTable _iHT; /** Stores floating point hyperparameters of the algorithm */ - HashTable _dHT; + HashTable _dHT; }; } // namespace internal diff --git a/cpp/daal/src/algorithms/covariance/covariance_impl.i b/cpp/daal/src/algorithms/covariance/covariance_impl.i index 24cb48524c5..0ebceeffcd7 100644 --- a/cpp/daal/src/algorithms/covariance/covariance_impl.i +++ b/cpp/daal/src/algorithms/covariance/covariance_impl.i @@ -1,6 +1,7 @@ /* file: covariance_impl.i */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,8 +136,14 @@ static inline size_t getBlockSize(size_t nrows) return 140; } +#if defined(TARGET_X86_64) + #define DAAL_CPU_TYPE avx512 +#elif defined(TARGET_ARM) + #define DAAL_CPU_TYPE sve +#endif + template <> -inline size_t getBlockSize(size_t nrows) +inline size_t getBlockSize(size_t nrows) { return (nrows > 5000 && nrows <= 50000) ? 1024 : 140; } diff --git a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_predict_dense_default_batch_impl.i index 426457ecad7..377fffd20dd 100644 --- a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_predict_dense_default_batch_impl.i @@ -1,6 +1,7 @@ /* file: df_classification_predict_dense_default_batch_impl.i */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -945,8 +946,12 @@ Status PredictClassificationTask::predictAllPointsByAllTre algorithmFPType * const res = resBD.get(); algorithmFPType * const prob = probBD.get(); daal::SafeStatus safeStat; - const size_t nRowsOfRes = _data->getNumberOfRows(); - const size_t blockSize = cpu == avx512 ? _DEFAULT_BLOCK_SIZE : _DEFAULT_BLOCK_SIZE_COMMON; + const size_t nRowsOfRes = _data->getNumberOfRows(); +#if defined(TARGET_X86_64) + const size_t blockSize = cpu == avx512 ? _DEFAULT_BLOCK_SIZE : _DEFAULT_BLOCK_SIZE_COMMON; +#elif defined(TARGET_ARM) + const size_t blockSize = cpu == sve ? _DEFAULT_BLOCK_SIZE : _DEFAULT_BLOCK_SIZE_COMMON; +#endif const size_t nBlocks = nRowsOfRes / blockSize; const size_t residualSize = nRowsOfRes - nBlocks * blockSize; algorithmFPType * commonBufVal = nullptr; diff --git a/cpp/daal/src/algorithms/kernel_config.h b/cpp/daal/src/algorithms/kernel_config.h index ed19658b813..e328311714f 100644 --- a/cpp/daal/src/algorithms/kernel_config.h +++ b/cpp/daal/src/algorithms/kernel_config.h @@ -1,6 +1,7 @@ /* file: kernel_config.h */ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +25,38 @@ #ifndef __KERNEL_CONFIG_H__ #define __KERNEL_CONFIG_H__ -#include "src/algorithms/kernel_inst_x86.h" +#include "services/daal_defines.h" +#include "src/services/service_defines.h" +#include "services/internal/daal_kernel_defines.h" +#include "services/internal/gpu_support_checker.h" + +#if defined(TARGET_X86_64) + #include "src/algorithms/kernel_inst_x86.h" +#elif defined(TARGET_ARM) + #include "src/algorithms/kernel_inst_arm.h" +#endif + +#define __DAAL_GET_CPUID int cpuid = daalEnv->cpuid; + +#define __DAAL_GET_CPUID_SAFE \ + int cpuid = DAAL_BASE_CPU; \ + DAAL_SAFE_CPU_CALL((cpuid = daalEnv->cpuid), (cpuid = DAAL_BASE_CPU)) + +#define __DAAL_KERNEL_MIN(a, b) ((a) < (b) ? (a) : (b)) + +#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER_SAFE(ContainerTemplate, Mode, ...) \ + __DAAL_INSTANTIATE_DISPATCH_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID_SAFE, \ + __VA_ARGS__) + +#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER(ContainerTemplate, Mode, ...) \ + __DAAL_INSTANTIATE_DISPATCH_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID, __VA_ARGS__) + +#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER_SYCL(ContainerTemplate, Mode, ...) \ + __DAAL_INSTANTIATE_DISPATCH_SYCL_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID, \ + __VA_ARGS__) + +#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER_SYCL_SAFE(ContainerTemplate, Mode, ...) \ + __DAAL_INSTANTIATE_DISPATCH_SYCL_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID_SAFE, \ + __VA_ARGS__) #endif diff --git a/cpp/daal/src/algorithms/kernel_inst_arm.h b/cpp/daal/src/algorithms/kernel_inst_arm.h new file mode 100644 index 00000000000..e72d94ef019 --- /dev/null +++ b/cpp/daal/src/algorithms/kernel_inst_arm.h @@ -0,0 +1,71 @@ +/* file: kernel_inst_arm.h */ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/* +//++ +// The defines used for kernel allocation, deallocation, and calling kernel methods +//-- +*/ + +#ifndef __KERNEL_INST_ARM_H__ +#define __KERNEL_INST_ARM_H__ + +#define __DAAL_INSTANTIATE_DISPATCH_IMPL(ContainerTemplate, Mode, ClassName, BaseClassName, GetCpuid, ...) \ + DAAL_KERNEL_SVE_CONTAINER1(ContainerTemplate, __VA_ARGS__) \ + namespace interface1 \ + { \ + template <> \ + ClassName DAAL_KERNEL_SVE_CONTAINER(ContainerTemplate, __VA_ARGS__)>::ClassName( \ + daal::services::Environment::env * daalEnv) \ + : BaseClassName(daalEnv), _cntr(nullptr) \ + { \ + GetCpuid switch (__DAAL_KERNEL_MIN(DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID, cpuid)) \ + { \ + DAAL_KERNEL_SVE_CONTAINER_CASE(ContainerTemplate, __VA_ARGS__) \ + default: _cntr = (new ContainerTemplate<__VA_ARGS__, sve>(daalEnv)); break; \ + } \ + } \ + \ + template class ClassName DAAL_KERNEL_SVE_CONTAINER(ContainerTemplate, __VA_ARGS__)>; \ + } + +#define __DAAL_INSTANTIATE_DISPATCH_SYCL_IMPL(ContainerTemplate, Mode, ClassName, BaseClassName, GetCpuid, ...) \ + DAAL_KERNEL_SVE_CONTAINER1(ContainerTemplate, __VA_ARGS__) \ + namespace interface1 \ + { \ + template <> \ + ClassName DAAL_KERNEL_SVE_CONTAINER(ContainerTemplate, __VA_ARGS__)>::ClassName( \ + daal::services::Environment::env * daalEnv) \ + : BaseClassName(daalEnv), _cntr(NULL) \ + { \ + GetCpuid switch (__DAAL_KERNEL_MIN(DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID, cpuid)) \ + { \ + DAAL_KERNEL_SVE_CONTAINER_CASE(ContainerTemplate, __VA_ARGS__) \ + default: \ + { \ + using cntrTemplateInst = ContainerTemplate<__VA_ARGS__, sve>; \ + static volatile services::internal::GpuSupportRegistrar registrar; \ + _cntr = (new cntrTemplateInst(daalEnv)); \ + break; \ + } \ + } \ + } \ + \ + template class ClassName DAAL_KERNEL_SVE_CONTAINER(ContainerTemplate, __VA_ARGS__)>; \ + } + +#endif diff --git a/cpp/daal/src/algorithms/kernel_inst_x86.h b/cpp/daal/src/algorithms/kernel_inst_x86.h index baf3d8d4153..1b30c74ccb1 100644 --- a/cpp/daal/src/algorithms/kernel_inst_x86.h +++ b/cpp/daal/src/algorithms/kernel_inst_x86.h @@ -24,19 +24,6 @@ #ifndef __KERNEL_INST_X86_H__ #define __KERNEL_INST_X86_H__ -#include "services/daal_defines.h" -#include "src/services/service_defines.h" -#include "services/internal/daal_kernel_defines.h" -#include "services/internal/gpu_support_checker.h" - -#define __DAAL_GET_CPUID int cpuid = daalEnv->cpuid; - -#define __DAAL_GET_CPUID_SAFE \ - int cpuid = DAAL_BASE_CPU; \ - DAAL_SAFE_CPU_CALL((cpuid = daalEnv->cpuid), (cpuid = DAAL_BASE_CPU)) - -#define __DAAL_KERNEL_MIN(a, b) ((a) < (b) ? (a) : (b)) - #define __DAAL_INSTANTIATE_DISPATCH_IMPL(ContainerTemplate, Mode, ClassName, BaseClassName, GetCpuid, ...) \ DAAL_KERNEL_SSE2_CONTAINER1(ContainerTemplate, __VA_ARGS__) \ DAAL_KERNEL_SSE42_CONTAINER1(ContainerTemplate, __VA_ARGS__) \ @@ -64,13 +51,6 @@ DAAL_KERNEL_AVX512_CONTAINER(ContainerTemplate, __VA_ARGS__)>; \ } -#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER_SAFE(ContainerTemplate, Mode, ...) \ - __DAAL_INSTANTIATE_DISPATCH_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID_SAFE, \ - __VA_ARGS__) - -#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER(ContainerTemplate, Mode, ...) \ - __DAAL_INSTANTIATE_DISPATCH_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID, __VA_ARGS__) - #define __DAAL_INSTANTIATE_DISPATCH_SYCL_IMPL(ContainerTemplate, Mode, ClassName, BaseClassName, GetCpuid, ...) \ DAAL_KERNEL_SSE2_CONTAINER1(ContainerTemplate, __VA_ARGS__) \ DAAL_KERNEL_SSE42_CONTAINER1(ContainerTemplate, __VA_ARGS__) \ @@ -104,12 +84,4 @@ DAAL_KERNEL_AVX512_CONTAINER(ContainerTemplate, __VA_ARGS__)>; \ } -#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER_SYCL(ContainerTemplate, Mode, ...) \ - __DAAL_INSTANTIATE_DISPATCH_SYCL_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID, \ - __VA_ARGS__) - -#define __DAAL_INSTANTIATE_DISPATCH_CONTAINER_SYCL_SAFE(ContainerTemplate, Mode, ...) \ - __DAAL_INSTANTIATE_DISPATCH_SYCL_IMPL(ContainerTemplate, Mode, AlgorithmDispatchContainer, AlgorithmContainerImpl, __DAAL_GET_CPUID_SAFE, \ - __VA_ARGS__) - #endif diff --git a/cpp/daal/src/algorithms/qr/qr_dense_default_pcl_impl.i b/cpp/daal/src/algorithms/qr/qr_dense_default_pcl_impl.i index 84f34861fb8..ce438f8b1fa 100755 --- a/cpp/daal/src/algorithms/qr/qr_dense_default_pcl_impl.i +++ b/cpp/daal/src/algorithms/qr/qr_dense_default_pcl_impl.i @@ -1,6 +1,7 @@ /* file: qr_dense_default_pcl_impl.i */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -88,6 +89,8 @@ inline int * get_nblocks_array(int * size) return array; } /* rows/cols is greater or equal to: --------------------------------------------------------- 0 1 2 4 8 16 32 64 128 256 512 1K 2K ----------------------------------------------------*/ + +#if defined(TARGET_X86_64) template <> inline int * get_nblocks_array(int * size) { @@ -116,6 +119,22 @@ inline int * get_nblocks_array(int * size) *size = sizeof(array) / sizeof(int) - 1; return array; } +#elif defined(TARGET_ARM) +template <> +inline int * get_nblocks_array(int * size) +{ + static int array[] = { 1, 1, 1, 2, 4, 8, 16, 20, 24, 24, 20, 0 }; + *size = sizeof(array) / sizeof(int) - 1; + return array; +} +template <> +inline int * get_nblocks_array(int * size) +{ + static int array[] = { 1, 1, 1, 2, 4, 8, 16, 20, 20, 24, 20, 0 }; + *size = sizeof(array) / sizeof(int) - 1; + return array; +} +#endif #define QR_CHECK_BREAK(cond, error) \ if (!(cond)) \ diff --git a/cpp/daal/src/externals/service_dispatch.h b/cpp/daal/src/externals/service_dispatch.h index 0afb2d01921..9a6aef97e92 100644 --- a/cpp/daal/src/externals/service_dispatch.h +++ b/cpp/daal/src/externals/service_dispatch.h @@ -1,6 +1,7 @@ /* file: service_dispatch.h */ /******************************************************************************* * Copyright 2018 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,26 +27,43 @@ #include "services/internal/daal_kernel_defines.h" -#define DAAL_DISPATCH_FUNCTION_BY_CPU(func, ...) \ - switch (static_cast(daal::services::Environment::getInstance()->getCpuId())) \ - { \ - DAAL_KERNEL_SSE42_ONLY_CODE(case daal::CpuType::sse42 : func(daal::CpuType::sse42, __VA_ARGS__); break;) \ - DAAL_KERNEL_AVX2_ONLY_CODE(case daal::CpuType::avx2 : func(daal::CpuType::avx2, __VA_ARGS__); break;) \ - DAAL_KERNEL_AVX512_ONLY_CODE(case daal::CpuType::avx512 : func(daal::CpuType::avx512, __VA_ARGS__); break;) \ - DAAL_EXPAND(default : func(daal::CpuType::sse2, __VA_ARGS__); break;) \ - } +#if defined(TARGET_X86_64) + #define DAAL_DISPATCH_FUNCTION_BY_CPU(func, ...) \ + switch (static_cast(daal::services::Environment::getInstance()->getCpuId())) \ + { \ + DAAL_KERNEL_SSE42_ONLY_CODE(case daal::CpuType::sse42 : func(daal::CpuType::sse42, __VA_ARGS__); break;) \ + DAAL_KERNEL_AVX2_ONLY_CODE(case daal::CpuType::avx2 : func(daal::CpuType::avx2, __VA_ARGS__); break;) \ + DAAL_KERNEL_AVX512_ONLY_CODE(case daal::CpuType::avx512 : func(daal::CpuType::avx512, __VA_ARGS__); break;) \ + DAAL_EXPAND(default : func(daal::CpuType::sse2, __VA_ARGS__); break;) \ + } -#define DAAL_DISPATCH_FUNCTION_BY_CPU_SAFE(func, ...) \ - services::Status st; \ - int cpuid = daal::sse2; \ - DAAL_SAFE_CPU_CALL((cpuid = daal::services::Environment::getInstance()->getCpuId()), (cpuid = daal::sse2)) \ - switch (static_cast(cpuid)) \ - { \ - DAAL_KERNEL_SSE42_ONLY_CODE(case daal::CpuType::sse42 : st = func(daal::CpuType::sse42, __VA_ARGS__); break;) \ - DAAL_KERNEL_AVX2_ONLY_CODE(case daal::CpuType::avx2 : st = func(daal::CpuType::avx2, __VA_ARGS__); break;) \ - DAAL_KERNEL_AVX512_ONLY_CODE(case daal::CpuType::avx512 : st = func(daal::CpuType::avx512, __VA_ARGS__); break;) \ - DAAL_EXPAND(default : st = func(daal::CpuType::sse2, __VA_ARGS__); break;) \ - } \ - services::throwIfPossible(st); + #define DAAL_DISPATCH_FUNCTION_BY_CPU_SAFE(func, ...) \ + services::Status st; \ + int cpuid = daal::sse2; \ + DAAL_SAFE_CPU_CALL((cpuid = daal::services::Environment::getInstance()->getCpuId()), (cpuid = daal::sse2)) \ + switch (static_cast(cpuid)) \ + { \ + DAAL_KERNEL_SSE42_ONLY_CODE(case daal::CpuType::sse42 : st = func(daal::CpuType::sse42, __VA_ARGS__); break;) \ + DAAL_KERNEL_AVX2_ONLY_CODE(case daal::CpuType::avx2 : st = func(daal::CpuType::avx2, __VA_ARGS__); break;) \ + DAAL_KERNEL_AVX512_ONLY_CODE(case daal::CpuType::avx512 : st = func(daal::CpuType::avx512, __VA_ARGS__); break;) \ + DAAL_EXPAND(default : st = func(daal::CpuType::sse2, __VA_ARGS__); break;) \ + } \ + services::throwIfPossible(st); +#elif defined(TARGET_ARM) + #define DAAL_DISPATCH_FUNCTION_BY_CPU(func, ...) \ + switch (static_cast(daal::services::Environment::getInstance()->getCpuId())) \ + { \ + DAAL_KERNEL_SVE_ONLY_CODE(case daal::CpuType::sve : func(daal::CpuType::sve, __VA_ARGS__); break;) \ + } + #define DAAL_DISPATCH_FUNCTION_BY_CPU_SAFE(func, ...) \ + services::Status st; \ + int cpuid = daal::sve; \ + DAAL_SAFE_CPU_CALL((cpuid = daal::services::Environment::getInstance()->getCpuId()), (cpuid = daal::sve)) \ + switch (static_cast(cpuid)) \ + { \ + DAAL_KERNEL_SVE_ONLY_CODE(case daal::CpuType::sve : st = func(daal::CpuType::sve, __VA_ARGS__); break;) \ + } \ + services::throwIfPossible(st); +#endif #endif diff --git a/cpp/daal/src/services/compiler/generic/env_detect_features.cpp b/cpp/daal/src/services/compiler/generic/env_detect_features.cpp index ea61430b4a4..0f50e003f17 100644 --- a/cpp/daal/src/services/compiler/generic/env_detect_features.cpp +++ b/cpp/daal/src/services/compiler/generic/env_detect_features.cpp @@ -1,6 +1,7 @@ /* file: env_detect_features.cpp */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +22,15 @@ //-- */ -#include - #include "services/env_detect.h" #include "services/daal_defines.h" + +#if defined(TARGET_X86_64) + #include +#elif defined(TARGET_ARM) + #include +#endif + #include "src/services/service_defines.h" #include "src/threading/threading.h" @@ -41,23 +47,24 @@ void __daal_serv_CPUHasAVX512f_enable_it_mac(); #endif +#if defined(TARGET_X86_64) void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t * abcd) { -#if defined(_MSC_VER) + #if defined(_MSC_VER) __cpuidex((int *)abcd, eax, ecx); -#else + #else uint32_t ebx, edx; - #if defined(__i386__) && defined(__PIC__) + #if defined(__i386__) && defined(__PIC__) /* in case of PIC under 32-bit EBX cannot be clobbered */ __asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); - #else + #else __asm__("cpuid" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); - #endif + #endif abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx; -#endif + #endif } bool __daal_internal_is_intel_cpu() @@ -86,11 +93,11 @@ static int check_cpuid(uint32_t eax, uint32_t ecx, int abcd_index, uint32_t mask static int check_xgetbv_xcr0_ymm(uint32_t mask) { uint32_t xcr0; -#if defined(_MSC_VER) + #if defined(_MSC_VER) xcr0 = (uint32_t)_xgetbv(0); -#else + #else __asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx"); -#endif + #endif return ((xcr0 & mask) == mask); /* checking if xmm and ymm state are enabled in XCR0 */ } @@ -187,9 +194,9 @@ DAAL_EXPORT bool __daal_serv_cpu_extensions_available() DAAL_EXPORT int __daal_serv_cpu_detect(int enable) { -#if defined(__APPLE__) + #if defined(__APPLE__) __daal_serv_CPUHasAVX512f_enable_it_mac(); -#endif + #endif if (check_avx512_features() && daal_check_is_intel_cpu()) { return daal::avx512; @@ -207,3 +214,24 @@ DAAL_EXPORT int __daal_serv_cpu_detect(int enable) return daal::sse2; } +#elif defined(TARGET_ARM) +DAAL_EXPORT bool __daal_serv_cpu_extensions_available() +{ + return 0; +} + +DAAL_EXPORT int __daal_serv_cpu_detect(int enable) +{ + return daal::sve; +} + +void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t * abcd) +{ + // TODO: ARM implementation for cpuid +} + +bool daal_check_is_intel_cpu() +{ + return false; +} +#endif diff --git a/cpp/daal/src/services/env_detect.cpp b/cpp/daal/src/services/env_detect.cpp index 36f61d7f903..f50bd6358fa 100644 --- a/cpp/daal/src/services/env_detect.cpp +++ b/cpp/daal/src/services/env_detect.cpp @@ -1,6 +1,7 @@ /* file: env_detect.cpp */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +32,12 @@ #include "src/services/service_topo.h" #include "src/threading/service_thread_pinner.h" +#if defined(TARGET_X86_64) + #define DAAL_HOST_CPUID daal::services::Environment::avx512 +#elif defined(TARGET_ARM) + #define DAAL_HOST_CPUID daal::services::Environment::sve +#endif + static daal::services::Environment::LibraryThreadingType daal_thr_set = (daal::services::Environment::LibraryThreadingType)-1; static bool isInit = false; @@ -80,7 +87,8 @@ DAAL_EXPORT int daal::services::Environment::enableInstructionsSet(int enable) DAAL_EXPORT int daal::services::Environment::setCpuId(int cpuid) { initNumberOfThreads(); - int host_cpuid = __daal_serv_cpu_detect(daal::services::Environment::avx512); + + int host_cpuid = __daal_serv_cpu_detect(DAAL_HOST_CPUID); if (!_env.cpuid_init_flag) { @@ -90,7 +98,7 @@ DAAL_EXPORT int daal::services::Environment::setCpuId(int cpuid) if (cpuid > host_cpuid) { - _cpu_detect(daal::services::Environment::avx512); + _cpu_detect(DAAL_HOST_CPUID); } else { diff --git a/cpp/daal/src/services/service_defines.h b/cpp/daal/src/services/service_defines.h index 70af3f301ff..ce1e0cd75f5 100644 --- a/cpp/daal/src/services/service_defines.h +++ b/cpp/daal/src/services/service_defines.h @@ -1,6 +1,7 @@ /* file: service_defines.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +34,11 @@ DAAL_EXPORT int __daal_serv_cpu_detect(int); void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t * abcd); bool daal_check_is_intel_cpu(); -#define DAAL_BASE_CPU daal::sse2 +#if defined(TARGET_X86_64) + #define DAAL_BASE_CPU daal::sse2 +#elif defined(TARGET_ARM) + #define DAAL_BASE_CPU daal::sve +#endif #define DAAL_CHECK_CPU_ENVIRONMENT (daal_check_is_intel_cpu()) @@ -117,18 +122,26 @@ enum DataFormat } // namespace daal /* CPU comparison macro */ -#define __sse2__ (0) -#define __sse42__ (2) -#define __avx2__ (4) -#define __avx512__ (6) +#if defined(TARGET_X86_64) + #define __sse2__ (0) + #define __sse42__ (2) + #define __avx2__ (4) + #define __avx512__ (6) +#elif defined(TARGET_ARM) + #define __sve__ (0) +#endif #define __float__ (0) #define __double__ (1) -#define CPU_sse2 __sse2__ -#define CPU_sse42 __sse42__ -#define CPU_avx2 __avx2__ -#define CPU_avx512 __avx512__ +#if defined(TARGET_X86_64) + #define CPU_sse2 __sse2__ + #define CPU_sse42 __sse42__ + #define CPU_avx2 __avx2__ + #define CPU_avx512 __avx512__ +#elif defined(TARGET_ARM) + #define CPU_sve __sve__ +#endif #define FPTYPE_float __float__ #define FPTYPE_double __double__ diff --git a/cpp/daal/src/services/service_topo.h b/cpp/daal/src/services/service_topo.h index 0340265f3ad..483e705af96 100644 --- a/cpp/daal/src/services/service_topo.h +++ b/cpp/daal/src/services/service_topo.h @@ -1,6 +1,7 @@ /* file: service_topo.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,9 +58,12 @@ typedef cpuset_t cpu_set_t; #define __cdecl - #ifdef __x86_64__ + #if defined(TARGET_X86_64) #define LNX_PTR2INT unsigned long long #define LNX_MY1CON 1LL + #elif defined(TARGET_ARM) +using LNX_PTR2INT = uintptr_t; +constexpr LNX_PTR2INT LNX_MY1CON = 1LL; #else #define LNX_PTR2INT unsigned int #define LNX_MY1CON 1 diff --git a/cpp/daal/src/threading/export_lnxarm.ref.def b/cpp/daal/src/threading/export_lnxarm.ref.def new file mode 100644 index 00000000000..58fccd3c71a --- /dev/null +++ b/cpp/daal/src/threading/export_lnxarm.ref.def @@ -0,0 +1,63 @@ +;=============================================================================== +; Copyright contributors to the oneDAL project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +;=============================================================================== + +EXPORTS +openblas_set_num_threads +openblas_get_num_threads +ssyrk_ +dsyrk_ +ssyr_ +dsyr_ +sgemm_ +dgemm_ +ssymm_ +dsymm_ +sgemv_ +dgemv_ +saxpy_ +daxpy_ +sdot_ +ddot_ +sgetrf_ +dgetrf_ +sgetrs_ +dgetrs_ +spotrf_ +dpotrf_ +spotrs_ +dpotrs_ +spotri_ +dpotri_ +sgerqf_ +dgerqf_ +sormrq_ +dormrq_ +strtrs_ +dtrtrs_ +spptrf_ +dpptrf_ +sgeqrf_ +dgeqrf_ +sgeqp3_ +dgeqp3_ +sorgqr_ +dorgqr_ +sgesvd_ +dgesvd_ +ssyevd_ +dsyevd_ +sormqr_ +dormqr_ diff --git a/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp index 4091a492cbc..42b8186a5fe 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include + #include "daal/src/algorithms/covariance/covariance_kernel.h" #include "oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel.hpp" @@ -23,6 +26,12 @@ #include "oneapi/dal/table/row_accessor.hpp" +#if defined(TARGET_X86_64) +#define CPU_EXTENSION dal::detail::cpu_extension::avx512 +#elif defined(TARGET_ARM) +#define CPU_EXTENSION dal::detail::cpu_extension::sve +#endif + namespace oneapi::dal::covariance::backend { using dal::backend::context_cpu; @@ -64,7 +73,7 @@ static compute_result call_daal_kernel_finalize(const context_cpu& ctx, /// the logic of block size calculation is copied from DAAL, /// to be changed to passing the values from the performance model std::int64_t blockSize = 140; - if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) { + if (ctx.get_enabled_cpu_extensions() == CPU_EXTENSION) { const std::int64_t row_count = rows_count_global; if (5000 < row_count && row_count <= 50000) { blockSize = 1024; diff --git a/cpp/oneapi/dal/algo/covariance/backend/cpu/partial_compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/covariance/backend/cpu/partial_compute_kernel_dense.cpp index 2058eeb457a..d7ec3fc3acc 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/cpu/partial_compute_kernel_dense.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/cpu/partial_compute_kernel_dense.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +24,12 @@ #include "oneapi/dal/table/row_accessor.hpp" +#if defined(TARGET_X86_64) +#define CPU_EXTENSION dal::detail::cpu_extension::avx512 +#elif defined(TARGET_ARM) +#define CPU_EXTENSION dal::detail::cpu_extension::sve +#endif + namespace oneapi::dal::covariance::backend { using dal::backend::context_cpu; @@ -53,7 +60,7 @@ static partial_compute_result call_daal_kernel_partial_compute( /// the logic of block size calculation is copied from DAAL, /// to be changed to passing the values from the performance model std::int64_t blockSize = 140; - if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) { + if (ctx.get_enabled_cpu_extensions() == CPU_EXTENSION) { const std::int64_t row_count = data.get_row_count(); if (5000 < row_count && row_count <= 50000) { blockSize = 1024; diff --git a/cpp/oneapi/dal/algo/covariance/parameters/cpu/compute_parameters.cpp b/cpp/oneapi/dal/algo/covariance/parameters/cpu/compute_parameters.cpp index 22ee3d8a4b0..6f2b7e59065 100644 --- a/cpp/oneapi/dal/algo/covariance/parameters/cpu/compute_parameters.cpp +++ b/cpp/oneapi/dal/algo/covariance/parameters/cpu/compute_parameters.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +16,7 @@ *******************************************************************************/ #include +#include #include "oneapi/dal/detail/common.hpp" #include "oneapi/dal/detail/profiler.hpp" @@ -27,6 +29,12 @@ #include "oneapi/dal/algo/covariance/parameters/cpu/compute_parameters.hpp" +#if defined(TARGET_X86_64) +#define CPU_EXTENSION dal::detail::cpu_extension::avx512 +#elif defined(TARGET_ARM) +#define CPU_EXTENSION dal::detail::cpu_extension::sve +#endif + namespace oneapi::dal::covariance::parameters { using dal::backend::context_cpu; @@ -46,7 +54,7 @@ std::int64_t propose_block_size(const context_cpu& ctx, const std::int64_t row_c /// The constants are defined as the values that show the best performance results /// in the series of performance measurements with the varying block sizes and dataset sizes. std::int64_t block_size = 140l; - if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) { + if (ctx.get_enabled_cpu_extensions() == CPU_EXTENSION) { /// Here if AVX512 extensions are available on CPU if (5000l < row_count && row_count <= 50000l) { block_size = 1024l; diff --git a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel.hpp b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel.hpp index 56ea3021929..b2269d4cdd9 100644 --- a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel.hpp +++ b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,10 +108,11 @@ vertex_similarity_result jaccard( return res; } +#if defined(TARGET_X86_64) template <> vertex_similarity_result jaccard( const detail::descriptor_base &desc, const dal::preview::detail::topology &t, void *result_ptr); - +#endif } // namespace oneapi::dal::preview::jaccard::backend diff --git a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_avx512.hpp b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_avx512.hpp index ca350bec92a..7e5a2dda834 100644 --- a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_avx512.hpp +++ b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_avx512.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +17,6 @@ #pragma once -#include - #include #include "oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel.hpp" diff --git a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_cpu.cpp b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_cpu.cpp index f9c81a76cbc..8bb76b2ddbe 100644 --- a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_cpu.cpp +++ b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_cpu.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_skx.cpp b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_skx.cpp index 15c5e3976cc..eef60006c5f 100644 --- a/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_skx.cpp +++ b/cpp/oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_skx.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +15,9 @@ * limitations under the License. *******************************************************************************/ +#if defined(TARGET_X86_64) #include +#endif #include "oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel.hpp" #include "oneapi/dal/algo/jaccard/backend/cpu/vertex_similarity_default_kernel_avx512.hpp" @@ -27,6 +30,7 @@ namespace oneapi::dal::preview::jaccard::backend { +#if defined(TARGET_X86_64) template vertex_similarity_result jaccard_avx512< dal::backend::cpu_dispatch_avx512>(const detail::descriptor_base& desc, const dal::preview::detail::topology& t, @@ -39,5 +43,6 @@ vertex_similarity_result jaccard(desc, t, result_ptr); } +#endif } // namespace oneapi::dal::preview::jaccard::backend diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp index e59c44d53b1..ffe447ec5d2 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include + #include #include #include "daal/src/algorithms/covariance/covariance_kernel.h" @@ -26,6 +29,12 @@ #include "oneapi/dal/backend/interop/table_conversion.hpp" #include "oneapi/dal/table/row_accessor.hpp" +#if defined(TARGET_X86_64) +#define CPU_EXTENSION dal::detail::cpu_extension::avx512 +#elif defined(TARGET_ARM) +#define CPU_EXTENSION dal::detail::cpu_extension::sve +#endif + namespace oneapi::dal::pca::backend { using dal::backend::context_cpu; @@ -84,7 +93,7 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx /// the logic of block size calculation is copied from DAAL, /// to be changed to passing the values from the performance model std::int64_t blockSize = 140; - if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) { + if (ctx.get_enabled_cpu_extensions() == CPU_EXTENSION) { if (5000 < row_count && row_count <= 50000) { blockSize = 1024; } diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp index 8400fd918fc..dff5d1a3a2a 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,9 @@ * limitations under the License. *******************************************************************************/ +#include + +#include #include #include "daal/src/algorithms/covariance/covariance_kernel.h" @@ -25,6 +29,12 @@ #include "oneapi/dal/backend/interop/table_conversion.hpp" #include "oneapi/dal/table/row_accessor.hpp" +#if defined(TARGET_X86_64) +#define CPU_EXTENSION dal::detail::cpu_extension::avx512 +#elif defined(TARGET_ARM) +#define CPU_EXTENSION dal::detail::cpu_extension::sve +#endif + namespace oneapi::dal::pca::backend { using dal::backend::context_cpu; @@ -66,7 +76,7 @@ static partial_train_result call_daal_kernel_partial_train( /// the logic of block size calculation is copied from DAAL, /// to be changed to passing the values from the performance model std::int64_t blockSize = 140; - if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) { + if (ctx.get_enabled_cpu_extensions() == CPU_EXTENSION) { const std::int64_t row_count = data.get_row_count(); if (5000 < row_count && row_count <= 50000) { blockSize = 1024; diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp index ef12d49fe74..63b364f7e6b 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include + #include #include @@ -25,6 +28,12 @@ #include "oneapi/dal/backend/interop/table_conversion.hpp" #include "oneapi/dal/table/row_accessor.hpp" +#if defined(TARGET_X86_64) +#define CPU_EXTENSION dal::detail::cpu_extension::avx512 +#elif defined(TARGET_ARM) +#define CPU_EXTENSION dal::detail::cpu_extension::sve +#endif + namespace oneapi::dal::pca::backend { using dal::backend::context_cpu; @@ -83,7 +92,8 @@ static result_t call_daal_kernel(const context_cpu& ctx, /// the logic of block size calculation is copied from DAAL, /// to be changed to passing the values from the performance model std::int64_t blockSize = 140; - if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) { + if (ctx.get_enabled_cpu_extensions() == CPU_EXTENSION) { + const std::int64_t row_count = data.get_row_count(); if (5000 < row_count && row_count <= 50000) { blockSize = 1024; } diff --git a/cpp/oneapi/dal/algo/subgraph_isomorphism/backend/cpu/compiler_adapt.hpp b/cpp/oneapi/dal/algo/subgraph_isomorphism/backend/cpu/compiler_adapt.hpp index 181dc2f31bc..f98462bc963 100644 --- a/cpp/oneapi/dal/algo/subgraph_isomorphism/backend/cpu/compiler_adapt.hpp +++ b/cpp/oneapi/dal/algo/subgraph_isomorphism/backend/cpu/compiler_adapt.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2021 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +17,13 @@ #pragma once #include + +#include + +#if defined(TARGET_X86_64) #include +#endif + #include #include "oneapi/dal/backend/dispatcher.hpp" @@ -83,6 +90,7 @@ ONEDAL_FORCEINLINE std::int32_t ONEDAL_popcnt64(std::uint64_t a) { #endif } +#if defined(TARGET_X86_64) template <> ONEDAL_FORCEINLINE std::int32_t ONEDAL_lzcnt_u32(std::uint32_t a) { if (a == 0) @@ -164,5 +172,20 @@ ONEDAL_FORCEINLINE std::int32_t ONEDAL_popcnt64 } return bit_cnt; } +#elif defined(TARGET_ARM) +template <> +ONEDAL_FORCEINLINE std::int32_t ONEDAL_lzcnt_u32(std::uint32_t a) { + return __builtin_clz(a); +} +template <> +ONEDAL_FORCEINLINE std::int32_t ONEDAL_lzcnt_u64(std::uint64_t a) { + return __builtin_clzl(a); +} + +template <> +ONEDAL_FORCEINLINE std::int32_t ONEDAL_popcnt64(std::uint64_t a) { + return __builtin_popcountl(a); +} +#endif } // namespace oneapi::dal::preview::subgraph_isomorphism::backend diff --git a/cpp/oneapi/dal/algo/triangle_counting/backend/cpu/intersection_tc.hpp b/cpp/oneapi/dal/algo/triangle_counting/backend/cpu/intersection_tc.hpp index cfbdc57231d..935e7057332 100644 --- a/cpp/oneapi/dal/algo/triangle_counting/backend/cpu/intersection_tc.hpp +++ b/cpp/oneapi/dal/algo/triangle_counting/backend/cpu/intersection_tc.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +17,9 @@ #pragma once +#if defined(TARGET_X86_64) #include +#endif #include @@ -68,6 +71,7 @@ ONEDAL_FORCEINLINE std::int32_t _popcnt32_redef(const std::int32_t& x) { {} #endif +#if defined(TARGET_X86_64) template <> struct intersection_local_tc { ONEDAL_FORCEINLINE std::int64_t operator()(const std::int32_t* neigh_u, @@ -419,5 +423,6 @@ struct intersection_local_tc { return total; } }; +#endif } // namespace oneapi::dal::preview::triangle_counting::backend diff --git a/cpp/oneapi/dal/backend/dispatcher.cpp b/cpp/oneapi/dal/backend/dispatcher.cpp index 1a5e39e95dd..69974fabdbb 100644 --- a/cpp/oneapi/dal/backend/dispatcher.cpp +++ b/cpp/oneapi/dal/backend/dispatcher.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,17 +38,25 @@ void context_cpu::global_init() { inline constexpr detail::cpu_extension from_daal_cpu_type(daal::CpuType cpu) { using detail::cpu_extension; switch (cpu) { +#if defined(TARGET_X86_64) case daal::sse2: return cpu_extension::sse2; case daal::sse42: return cpu_extension::sse42; case daal::avx2: return cpu_extension::avx2; case daal::avx512: return cpu_extension::avx512; +#elif defined(TARGET_ARM) + case daal::sve: return cpu_extension::sve; +#endif } return cpu_extension::none; } detail::cpu_extension detect_top_cpu_extension() { if (!__daal_serv_cpu_extensions_available()) { +#if defined(TARGET_X86_64) return detail::cpu_extension::sse2; +#elif defined(TARGET_ARM) + return detail::cpu_extension::sve; +#endif } const auto daal_cpu = (daal::CpuType)__daal_serv_cpu_detect(0); return from_daal_cpu_type(daal_cpu); diff --git a/cpp/oneapi/dal/backend/dispatcher.hpp b/cpp/oneapi/dal/backend/dispatcher.hpp index 5325cb2efab..7737f214ebf 100644 --- a/cpp/oneapi/dal/backend/dispatcher.hpp +++ b/cpp/oneapi/dal/backend/dispatcher.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +17,8 @@ #pragma once +#include "daal/include/services/daal_defines.h" + #include "oneapi/dal/detail/policy.hpp" #include "oneapi/dal/detail/spmd_policy.hpp" @@ -38,11 +41,16 @@ namespace oneapi::dal::backend { detail::cpu_extension detect_top_cpu_extension(); +#if defined(TARGET_X86_64) struct cpu_dispatch_sse2 {}; struct cpu_dispatch_sse42 {}; struct cpu_dispatch_avx2 {}; struct cpu_dispatch_avx512 {}; +#elif defined(TARGET_ARM) +struct cpu_dispatch_sve {}; +#endif +#if defined(TARGET_X86_64) using cpu_dispatch_default = cpu_dispatch_sse2; #define __CPU_TAG_SSE2__ oneapi::dal::backend::cpu_dispatch_sse2 @@ -51,6 +59,13 @@ using cpu_dispatch_default = cpu_dispatch_sse2; #define __CPU_TAG_AVX512__ oneapi::dal::backend::cpu_dispatch_avx512 #define __CPU_TAG_DEFAULT__ oneapi::dal::backend::cpu_dispatch_default +#elif defined(TARGET_ARM) +using cpu_dispatch_default = cpu_dispatch_sve; + +#define __CPU_TAG_ARMV8SVE__ oneapi::dal::backend::cpu_dispatch_sve + +#endif + template class communicator_provider : public base { public: @@ -279,6 +294,8 @@ inline constexpr auto dispatch_by_cpu(const context_cpu& ctx, Op&& op) { using detail::cpu_extension; [[maybe_unused]] const cpu_extension cpu_ex = ctx.get_enabled_cpu_extensions(); + +#if defined(TARGET_X86_64) ONEDAL_IF_CPU_DISPATCH_AVX512(if (test_cpu_extension(cpu_ex, cpu_extension::avx512)) { return op(cpu_dispatch_avx512{}); }) @@ -286,6 +303,12 @@ inline constexpr auto dispatch_by_cpu(const context_cpu& ctx, Op&& op) { if (test_cpu_extension(cpu_ex, cpu_extension::avx2)) { return op(cpu_dispatch_avx2{}); }) ONEDAL_IF_CPU_DISPATCH_SSE42( if (test_cpu_extension(cpu_ex, cpu_extension::sse42)) { return op(cpu_dispatch_sse42{}); }) + +#elif defined(TARGET_ARM) + ONEDAL_IF_CPU_DISPATCH_A8SVE( + if (test_cpu_extension(cpu_ex, cpu_extension::sve)) { return op(cpu_dispatch_sve{}); }) +#endif + return op(cpu_dispatch_default{}); } diff --git a/cpp/oneapi/dal/backend/dispatcher_cpu.hpp b/cpp/oneapi/dal/backend/dispatcher_cpu.hpp index ef93e796f4b..ca7c92e0d38 100644 --- a/cpp/oneapi/dal/backend/dispatcher_cpu.hpp +++ b/cpp/oneapi/dal/backend/dispatcher_cpu.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,14 +17,21 @@ #pragma once +#include + #ifdef __ONEDAL_IDE_MODE__ // If this file is openned in IDE it will complain about // `_onedal_dispatcher_cpu.hpp` as this file is generated at build time. // It's recommended to define __ONEDAL_IDE_MODE__ in your IDE settings to // enable this branch for preprocessor. + +#if defined(TARGET_X86_64) #define ONEDAL_CPU_DISPATCH_SSE42 #define ONEDAL_CPU_DISPATCH_AVX2 #define ONEDAL_CPU_DISPATCH_AVX512 +#elif defined(TARGET_ARM) +#define ONEDAL_CPU_DISPATCH_A8SVE +#endif #else // This file is automatically generated by build system #include "oneapi/dal/_dal_cpu_dispatcher_gen.hpp" @@ -46,3 +54,9 @@ #else #define ONEDAL_IF_CPU_DISPATCH_AVX512(x) #endif + +#ifdef ONEDAL_CPU_DISPATCH_A8SVE +#define ONEDAL_IF_CPU_DISPATCH_A8SVE(x) x +#else +#define ONEDAL_IF_CPU_DISPATCH_A8SVE(x) +#endif diff --git a/cpp/oneapi/dal/backend/interop/common.hpp b/cpp/oneapi/dal/backend/interop/common.hpp index c9d6652396c..82a2d77700b 100644 --- a/cpp/oneapi/dal/backend/interop/common.hpp +++ b/cpp/oneapi/dal/backend/interop/common.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +31,7 @@ struct daal_cpu_value { constexpr static daal::CpuType value = cpu; }; +#if defined(TARGET_X86_64) template <> struct to_daal_cpu_type : daal_cpu_value {}; template <> @@ -39,6 +41,12 @@ struct to_daal_cpu_type : daal_cpu_value {}; template <> struct to_daal_cpu_type : daal_cpu_value {}; +#elif defined(TARGET_ARM) +template <> +struct to_daal_cpu_type : daal_cpu_value {}; + +#endif + template typename CpuKernel, typename... Args> inline auto call_daal_kernel(const context_cpu& ctx, Args&&... args) { return dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { diff --git a/cpp/oneapi/dal/backend/micromkl/macro.hpp b/cpp/oneapi/dal/backend/micromkl/macro.hpp index 3cd555e78cb..b46910ce6e5 100644 --- a/cpp/oneapi/dal/backend/micromkl/macro.hpp +++ b/cpp/oneapi/dal/backend/micromkl/macro.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2021 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +17,8 @@ #pragma once +#include + #ifndef __MICROMKL_INCLUDE_GUARD__ #error "This header cannot be included outside of micromkl module" #endif @@ -50,8 +53,12 @@ FUNC_CPU_DECL(nominal_cpu, prefix, name, argdecl) \ DISPATCH_FUNC_CPU(nominal_cpu, actual_cpu, prefix, name, argdecl, argcall) +#if defined(TARGET_X86_64) #define FUNC_AVX512(...) EXPAND(FUNC_CPU(avx512, avx512, __VA_ARGS__)) #define FUNC_AVX2(...) EXPAND(FUNC_CPU(avx2, avx2, __VA_ARGS__)) +#elif defined(TARGET_ARM) +#define FUNC_A8SVE(...) EXPAND(FUNC_CPU(sve, sve, __VA_ARGS__)) +#endif #ifdef __APPLE__ #define FUNC_SSE42(...) EXPAND(FUNC_CPU(sse42, avx2, __VA_ARGS__)) @@ -61,12 +68,18 @@ #define FUNC_SSE2(...) EXPAND(FUNC_CPU(sse2, sse2, __VA_ARGS__)) #endif +#if defined(TARGET_X86_64) #define FUNC(prefix, name, argdecl, argcall) \ DISPATCH_FUNC_DECL(prefix, name, argdecl) \ FUNC_AVX512(prefix, name, argdecl, argcall) \ FUNC_AVX2(prefix, name, argdecl, argcall) \ FUNC_SSE42(prefix, name, argdecl, argcall) \ FUNC_SSE2(prefix, name, argdecl, argcall) +#elif defined(TARGET_ARM) +#define FUNC(prefix, name, argdecl, argcall) \ + DISPATCH_FUNC_DECL(prefix, name, argdecl) \ + FUNC_A8SVE(prefix, name, argdecl, argcall) +#endif #ifdef ONEDAL_REF #define FUNC_DECL(prefix, floatabr, name, argdecl, argcall) \ @@ -83,6 +96,12 @@ #define INSTANTIATE_CPU(cpu, name, Float, argdecl) \ template void name argdecl(Float); +#ifdef ONEDAL_CPU_DISPATCH_A8SVE +#define INSTANTIATE_A8SVE(...) EXPAND(INSTANTIATE_CPU(sve, __VA_ARGS__)) +#else +#define INSTANTIATE_A8SVE(...) +#endif + #ifdef ONEDAL_CPU_DISPATCH_AVX512 #define INSTANTIATE_AVX512(...) EXPAND(INSTANTIATE_CPU(avx512, __VA_ARGS__)) #else @@ -103,11 +122,15 @@ #define INSTANTIATE_SSE2(...) EXPAND(INSTANTIATE_CPU(sse2, __VA_ARGS__)) +#if defined(TARGET_X86_64) #define INSTANTIATE_FLOAT(name, Float, argdecl) \ INSTANTIATE_AVX512(name, Float, argdecl) \ INSTANTIATE_AVX2(name, Float, argdecl) \ INSTANTIATE_SSE42(name, Float, argdecl) \ INSTANTIATE_SSE2(name, Float, argdecl) +#elif defined(TARGET_ARM) +#define INSTANTIATE_FLOAT(name, Float, argdecl) INSTANTIATE_A8SVE(name, Float, argdecl) +#endif #define FUNC_TEMPLATE(prefix, name, fargdecl, cargdecl, fargcall, cargcall) \ FUNC_DECL(prefix, s, name, fargdecl(float), fargcall) \ diff --git a/cpp/oneapi/dal/backend/primitives/intersection/intersection.hpp b/cpp/oneapi/dal/backend/primitives/intersection/intersection.hpp index b8dcc175838..341a0b7637e 100644 --- a/cpp/oneapi/dal/backend/primitives/intersection/intersection.hpp +++ b/cpp/oneapi/dal/backend/primitives/intersection/intersection.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +17,9 @@ #pragma once +#if defined(TARGET_X86_64) #include +#endif #include @@ -62,6 +65,7 @@ ONEDAL_FORCEINLINE std::int32_t _popcnt32_redef(const std::int32_t &x) { {} #endif +#if defined(TARGET_X86_64) template <> ONEDAL_FORCEINLINE std::int64_t intersection( const std::int32_t *neigh_u, @@ -569,5 +573,6 @@ ONEDAL_FORCEINLINE std::int64_t intersection( } return total; } +#endif } // namespace oneapi::dal::preview::backend diff --git a/cpp/oneapi/dal/detail/dispatcher.hpp b/cpp/oneapi/dal/detail/dispatcher.hpp index 522287c3663..15d94d098d3 100644 --- a/cpp/oneapi/dal/detail/dispatcher.hpp +++ b/cpp/oneapi/dal/detail/dispatcher.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,22 +17,33 @@ #pragma once +#include + namespace oneapi::dal::detail { namespace v1 { +#if defined(TARGET_X86_64) struct cpu_dispatch_sse2 {}; struct cpu_dispatch_sse42 {}; struct cpu_dispatch_avx2 {}; struct cpu_dispatch_avx512 {}; - using cpu_dispatch_default = cpu_dispatch_sse2; +#elif defined(TARGET_ARM) +struct cpu_dispatch_sve {}; +using cpu_dispatch_default = cpu_dispatch_sve; +#endif } // namespace v1 +#if defined(TARGET_X86_64) using v1::cpu_dispatch_sse2; using v1::cpu_dispatch_sse42; using v1::cpu_dispatch_avx2; using v1::cpu_dispatch_avx512; +#elif defined(TARGET_ARM) +using v1::cpu_dispatch_sve; +#endif + using v1::cpu_dispatch_default; } // namespace oneapi::dal::detail diff --git a/cpp/oneapi/dal/detail/policy.hpp b/cpp/oneapi/dal/detail/policy.hpp index c62c31e9c30..127c9770d6c 100644 --- a/cpp/oneapi/dal/detail/policy.hpp +++ b/cpp/oneapi/dal/detail/policy.hpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2020 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +17,15 @@ #pragma once +// TODO: Clean up this redefinition and import the defines globally. +#if defined(__x86_64__) || defined(__x86_64) || defined(__amd64) || defined(_M_AMD64) +#define TARGET_X86_64 +#endif + +#if defined(__ARM_ARCH) || defined(__aarch64__) +#define TARGET_ARM +#endif + #include #ifdef ONEDAL_DATA_PARALLEL #include @@ -61,10 +71,14 @@ inline constexpr bool is_data_parallel_policy_v = is_data_parallel_policy::va enum class cpu_extension : uint64_t { none = 0U, +#if defined(TARGET_X86_64) sse2 = 1U << 0, sse42 = 1U << 2, avx2 = 1U << 4, avx512 = 1U << 5 +#elif defined(TARGET_ARM) + sve = 1U << 0, +#endif }; class ONEDAL_EXPORT default_host_policy {}; diff --git a/deploy/local/dal b/deploy/local/dal index 20ea287f505..6ddff7e537b 100644 --- a/deploy/local/dal +++ b/deploy/local/dal @@ -1,6 +1,7 @@ #%Module1.0################################################################### #=============================================================================== # Copyright 2020 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -60,7 +61,14 @@ proc ModulesHelp { } { # Set intermediate variables set dalroot "$componentroot" -set daal_target_arch "intel64" +set daalroot "$componentroot/$modulefilever" +if {[string equal [info machine] "aarch64"]} { + set daal_target_arch "arm" +} else { + set daal_target_arch "intel64" +} + +module-whatis "oneAPI Data Analytics Library for $daal_target_arch." # Setup environment variables setenv DAL_MAJOR_BINARY 1 diff --git a/deploy/local/vars_lnx.sh b/deploy/local/vars_lnx.sh index fc0172ff9bc..a39ece22af5 100644 --- a/deploy/local/vars_lnx.sh +++ b/deploy/local/vars_lnx.sh @@ -4,6 +4,7 @@ #=============================================================================== # Copyright 2014 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -223,6 +224,17 @@ if [ ! -d $__daal_tmp_dir ]; then __daal_tmp_dir=${component_root} fi +ARCH_ONEDAL=$(uname -m) + +if [[ "${ARCH_ONEDAL}" == "x86_64" ]]; then + ARCH_DIR_ONEDAL="intel64" +elif [[ "${ARCH_ONEDAL}" == "aarch64" ]]; then + ARCH_DIR_ONEDAL="arm" +else + echo "Unsupported CPU architecture '${ARCH_ONEDAL}'" + exit 1 +fi + if [ "$(basename "${my_script_path}")" = "env" ] ; then # assume stand-alone # case "${my_script_path}" in # *"env"*) @@ -239,8 +251,8 @@ if [ "$(basename "${my_script_path}")" = "env" ] ; then # assume stand-alone export LD_LIBRARY_PATH="$__daal_tmp_dir/lib${LD_LIBRARY_PATH+:${LD_LIBRARY_PATH}}" else export CPATH="$__daal_tmp_dir/include${CPATH+:${CPATH}}" - export LIBRARY_PATH="$__daal_tmp_dir/lib/intel64${LIBRARY_PATH+:${LIBRARY_PATH}}" - export LD_LIBRARY_PATH="$__daal_tmp_dir/lib/intel64${LD_LIBRARY_PATH+:${LD_LIBRARY_PATH}}" + export LIBRARY_PATH="$__daal_tmp_dir/lib/$ARCH_DIR_ONEDAL${LIBRARY_PATH+:${LIBRARY_PATH}}" + export LD_LIBRARY_PATH="$__daal_tmp_dir/lib/$ARCH_DIR_ONEDAL${LD_LIBRARY_PATH+:${LD_LIBRARY_PATH}}" fi # ;; else # must be a consolidated layout diff --git a/deploy/nuget/prepare_dal_nuget.sh b/deploy/nuget/prepare_dal_nuget.sh index bf05f73978c..5d115c71e9b 100755 --- a/deploy/nuget/prepare_dal_nuget.sh +++ b/deploy/nuget/prepare_dal_nuget.sh @@ -1,6 +1,7 @@ #!/bin/bash #=============================================================================== # Copyright 2022 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -53,12 +54,20 @@ create_package() { # platform specific platform=$(bash $(dirname "$0")/../../dev/make/identify_os.sh) if [ ${platform} = "lnx32e" ]; then - platform=linux-x64 + platform=linux tbb_platform=linux rls_prefix=${rls_dir}/daal/latest dynamic_lib_path=lib/intel64 static_lib_path=lib/intel64 lib_prefix=libonedal + elif [ ${platform} = "lnxarm" ]; then + platform=linux + tbb_platform=linux + rls_prefix=${rls_dir}/daal/latest + dynamic_lib_path=lib/arm + static_lib_path=lib/arm + lib_prefix=libonedal + elif [ ${platform} = "mac32e" ]; then platform=osx-x64 tbb_platform=osx @@ -98,7 +107,7 @@ create_package() { if [ "${build_nupkg}" = "yes" ]; then # extension of libraries - if [ "${platform}" = "linux-x64" ]; then + if [ "${platform}" = "linux" ]; then dl_postfix=.so.${major_binary_version}.${minor_binary_version} sl_postfix=.a elif [ "${platform}" = "osx-x64" ]; then diff --git a/deploy/pkg-config/generate_pkgconfig.py b/deploy/pkg-config/generate_pkgconfig.py index b287f853bc4..323ff7e5138 100755 --- a/deploy/pkg-config/generate_pkgconfig.py +++ b/deploy/pkg-config/generate_pkgconfig.py @@ -1,5 +1,7 @@ +'''generate_pkgconfig.py''' #=============================================================================== # Copyright 2021 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +21,21 @@ import glob import argparse from sys import platform +import platform as plt + +def detect_cpu_architecture(): + """ + Detect CPU architecture + """ + architecture = plt.machine() + if architecture in ('x86_64', 'AMD64'): + return 'x86_64' + elif architecture.startswith('arm') or architecture == 'aarch64': + return 'aarch64' + else: + sys.stderr.write(f"Unknown Architecture {architecture} Detected. " \ + "Only 'x86_64', 'AMD64' and 'aarch64' supported.\n") + sys.exit(1) LIBS_PAR_STAT, LIBS_PAR_DYN = [], [] @@ -45,9 +62,18 @@ }, } +ARCH = detect_cpu_architecture() + if platform in ["linux2", "linux"]: PREF_LIB = "lib" - LIBDIR = 'lib/intel64' + + if ARCH == 'x86_64': + LIBDIR = 'lib/intel64' + elif ARCH == 'aarch64': + LIBDIR = 'lib/arm' + else: + sys.stderr.write(f"Unknown CPU architecture '{ARCH}'\n") + SUFF_DYN_LIB = ".so" SUFF_STAT_LIB = ".a" TBB_LIBS = "-ltbb -ltbbmalloc" diff --git a/dev/bazel/config/cpudetect.cpp b/dev/bazel/config/cpudetect.cpp index a6abc42eb01..eee6a3d3752 100644 --- a/dev/bazel/config/cpudetect.cpp +++ b/dev/bazel/config/cpudetect.cpp @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +15,17 @@ * limitations under the License. *******************************************************************************/ +#if defined(__x86_64__) || defined(__x86_64) || defined(__amd64) || defined(_M_AMD64) + #define TARGET_X86_64 +#endif + +#if defined(__ARM_ARCH) || defined(__aarch64__) + #define TARGET_ARM +#endif + +#if defined(TARGET_X86_64) #include +#endif #if defined(_MSC_VER) #if (_MSC_FULL_VER >= 160040219) @@ -154,20 +165,25 @@ int check_sse42_features() { } std::string detect_cpu() { - try_enable_avx512f_on_macos(); - if (check_avx512_features()) { - return "avx512"; - } - else if (check_avx2_features()) { - return "avx2"; - } - else if (check_sse42_features()) { - return "sse42"; - } - else { - return "sse2"; - } + #if defined(TARGET_X86_64) + try_enable_avx512f_on_macos(); + + if (check_avx512_features()) { + return "avx512"; + } + else if (check_avx2_features()) { + return "avx2"; + } + else if (check_sse42_features()) { + return "sse42"; + } + else { + return "sse2"; + } + #elif defined(TARGET_ARM) + return "sve"; + #endif } int main(int argc, char const *argv[]) { diff --git a/dev/download_tbb.sh b/dev/download_tbb.sh index fe05c687795..5ba94225c95 100755 --- a/dev/download_tbb.sh +++ b/dev/download_tbb.sh @@ -1,6 +1,7 @@ #!/bin/bash #=============================================================================== # Copyright 2014 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/dev/make/cmplr.clang.mkl.mk b/dev/make/cmplr.clang.mkl.32e.mk similarity index 97% rename from dev/make/cmplr.clang.mkl.mk rename to dev/make/cmplr.clang.mkl.32e.mk index 52d77cf757f..65ac0aca76b 100644 --- a/dev/make/cmplr.clang.mkl.mk +++ b/dev/make/cmplr.clang.mkl.32e.mk @@ -16,7 +16,7 @@ #=============================================================================== #++ -# Clang defenitions for makefile +# Clang definitions for makefile #-- PLATs.clang = lnx32e mac32e diff --git a/dev/make/cmplr.clang.ref.mk b/dev/make/cmplr.clang.ref.32e.mk similarity index 98% rename from dev/make/cmplr.clang.ref.mk rename to dev/make/cmplr.clang.ref.32e.mk index b7d12348253..648c1042f7c 100644 --- a/dev/make/cmplr.clang.ref.mk +++ b/dev/make/cmplr.clang.ref.32e.mk @@ -16,7 +16,7 @@ #=============================================================================== #++ -# Clang defenitions for makefile +# Clang definitions for makefile #-- PLATs.clang = lnx32e mac32e diff --git a/dev/make/cmplr.clang.ref.arm.mk b/dev/make/cmplr.clang.ref.arm.mk new file mode 100644 index 00000000000..50996dc4a06 --- /dev/null +++ b/dev/make/cmplr.clang.ref.arm.mk @@ -0,0 +1,45 @@ +# file: cmplr.clang.ref.arm.mk +#=============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +#++ +# Clang definitions for makefile +#-- + +PLATs.clang = lnxarm + +CMPLRDIRSUFF.clang = _clang + +CORE.SERV.COMPILER.clang = generic + +-Zl.clang = +-DEBC.clang = -g + +COMPILER.lnx.clang= clang++ -march=armv8-a+sve \ + -DDAAL_REF -DONEDAL_REF -DDAAL_CPU=sve -Werror -Wreturn-type +# Linker flags +link.dynamic.lnx.clang = clang++ -march=armv8-a+sve + +pedantic.opts.clang = -pedantic \ + -Wall \ + -Wextra \ + -Wno-unused-parameter + +pedantic.opts.mac.clang = $(pedantic.opts.clang) +pedantic.opts.lnx.clang = $(pedantic.opts.clang) + +# For SVE +a8sve_OPT.clang = $(-Q)march=armv8-a+sve diff --git a/dev/make/cmplr.dpcpp.mk b/dev/make/cmplr.dpcpp.mk index 2a78043b729..848f36c2db1 100644 --- a/dev/make/cmplr.dpcpp.mk +++ b/dev/make/cmplr.dpcpp.mk @@ -16,7 +16,7 @@ #=============================================================================== #++ -# DPC++ Compiler defenitions for makefile +# DPC++ Compiler definitions for makefile #-- PLATs.dpcpp = lnx32e win32e diff --git a/dev/make/cmplr.gnu.mkl.mk b/dev/make/cmplr.gnu.mkl.32e.mk similarity index 98% rename from dev/make/cmplr.gnu.mkl.mk rename to dev/make/cmplr.gnu.mkl.32e.mk index 2e5008a519a..481fda8d19e 100644 --- a/dev/make/cmplr.gnu.mkl.mk +++ b/dev/make/cmplr.gnu.mkl.32e.mk @@ -15,7 +15,7 @@ #=============================================================================== #++ -# g++ defenitions for makefile +# g++ definitions for makefile #-- PLATs.gnu = lnx32e mac32e diff --git a/dev/make/cmplr.gnu.ref.mk b/dev/make/cmplr.gnu.ref.32e.mk similarity index 98% rename from dev/make/cmplr.gnu.ref.mk rename to dev/make/cmplr.gnu.ref.32e.mk index fac0235da8d..27f6d817605 100644 --- a/dev/make/cmplr.gnu.ref.mk +++ b/dev/make/cmplr.gnu.ref.32e.mk @@ -15,7 +15,7 @@ #=============================================================================== #++ -# g++ defenitions for makefile +# g++ definitions for makefile #-- PLATs.gnu = lnx32e mac32e diff --git a/dev/make/cmplr.gnu.ref.arm.mk b/dev/make/cmplr.gnu.ref.arm.mk new file mode 100644 index 00000000000..b8567ee7b4d --- /dev/null +++ b/dev/make/cmplr.gnu.ref.arm.mk @@ -0,0 +1,48 @@ +#=============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +#++ +# g++ definitions for makefile +#-- + +PLATs.gnu = lnxarm + +CMPLRDIRSUFF.gnu = _gnu + +CORE.SERV.COMPILER.gnu = generic + +-Zl.gnu = +-DEBC.gnu = -g + +COMPILER.all.gnu = ${CXX} -march=armv8-a+sve -fwrapv -fno-strict-overflow -fno-delete-null-pointer-checks \ + -DDAAL_REF -DONEDAL_REF -DDAAL_CPU=sve -Werror -Wreturn-type + +link.dynamic.all.gnu = ${CXX} -march=native + +pedantic.opts.all.gnu = -pedantic \ + -Wall \ + -Wextra \ + -Wno-unused-parameter + +COMPILER.lnx.gnu = $(COMPILER.all.gnu) +link.dynamic.lnx.gnu = $(link.dynamic.all.gnu) +pedantic.opts.lnx.gnu = $(pedantic.opts.all.gnu) + +COMPILER.mac.gnu = $(COMPILER.all.gnu) +link.dynamic.mac.gnu = $(link.dynamic.all.gnu) +pedantic.opts.mac.gnu = $(pedantic.opts.all.gnu) + +a8sve_OPT.gnu = $(-Q)march=armv8-a+sve diff --git a/dev/make/cmplr.icc.mkl.mk b/dev/make/cmplr.icc.mkl.32e.mk similarity index 97% rename from dev/make/cmplr.icc.mkl.mk rename to dev/make/cmplr.icc.mkl.32e.mk index a3ccb0750f3..a6ff2410ecc 100644 --- a/dev/make/cmplr.icc.mkl.mk +++ b/dev/make/cmplr.icc.mkl.32e.mk @@ -15,7 +15,7 @@ #=============================================================================== #++ -# Intel compiler defenitions for makefile +# Intel compiler definitions for makefile #-- PLATs.icc = lnx32e win32e mac32e diff --git a/dev/make/cmplr.icx.mkl.mk b/dev/make/cmplr.icx.mkl.32e.mk similarity index 96% rename from dev/make/cmplr.icx.mkl.mk rename to dev/make/cmplr.icx.mkl.32e.mk index cbcde1a7e09..b22bcfe22ac 100644 --- a/dev/make/cmplr.icx.mkl.mk +++ b/dev/make/cmplr.icx.mkl.32e.mk @@ -15,7 +15,7 @@ #=============================================================================== #++ -# Intel compiler defenitions for makefile +# Intel compiler definitions for makefile #-- PLATs.icx = lnx32e mac32e diff --git a/dev/make/cmplr.vc.mkl.mk b/dev/make/cmplr.vc.mkl.32e.mk similarity index 100% rename from dev/make/cmplr.vc.mkl.mk rename to dev/make/cmplr.vc.mkl.32e.mk diff --git a/dev/make/identify_os.sh b/dev/make/identify_os.sh index 12816582bbc..88843b91f82 100755 --- a/dev/make/identify_os.sh +++ b/dev/make/identify_os.sh @@ -16,8 +16,16 @@ #=============================================================================== os=$(uname) +ARCH=$(uname -m) if [ "${os}" = "Linux" ]; then - echo lnx32e + if [ "${ARCH}" = "x86_64" ]; then + echo lnx32e + elif [ "${ARCH}" = "aarch64" ]; then + echo lnxarm + else + echo "Unkown architecture: ${ARCH}" + exit 1 + fi elif [ "${os}" = "Darwin" ]; then echo mac32e elif [[ "${os}" =~ "MSYS" || "${os}" =~ "CYGWIN" ]]; then diff --git a/examples/cmake/setup_examples.cmake b/examples/cmake/setup_examples.cmake index 78e37159f17..a705b7e10f6 100644 --- a/examples/cmake/setup_examples.cmake +++ b/examples/cmake/setup_examples.cmake @@ -1,5 +1,6 @@ #=============================================================================== # Copyright 2023 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -101,6 +102,16 @@ endfunction() function (add_examples examples_paths) foreach(example_file_path ${examples_paths}) get_filename_component(example ${example_file_path} NAME_WE) + + # Detect CPU architecture + if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64") + set(CPU_ARCHITECTURE "intel_intel64") + elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(CPU_ARCHITECTURE "arm_aarch64") + else() + message(FATAL_ERROR "Unkown architecture ${CMAKE_HOST_SYSTEM_PROCESSOR}") + endif() + add_executable(${example} ${example_file_path}) target_include_directories(${example} PRIVATE ${oneDAL_INCLUDE_DIRS}) if (UNIX AND NOT APPLE) @@ -110,7 +121,7 @@ function (add_examples examples_paths) endif() target_compile_options(${example} PRIVATE ${ONEDAL_CUSTOM_COMPILE_OPTIONS}) target_link_options(${example} PRIVATE ${ONEDAL_CUSTOM_LINK_OPTIONS}) - set_target_properties(${example} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/_cmake_results/intel_intel64_${LINK_TYPE}") + set_target_properties(${example} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/_cmake_results/${CPU_ARCHITECTURE}_${LINK_TYPE}") endforeach() set_common_compiler_options() endfunction() diff --git a/makefile b/makefile index f4ecdffa268..e5abcb9846e 100644 --- a/makefile +++ b/makefile @@ -1,5 +1,6 @@ #=============================================================================== # Copyright 2014 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,6 +28,7 @@ ifeq (help,$(MAKECMDGOALS)) endif attr.lnx32e = lnx intel64 lin +attr.lnxarm = lnx arm lin attr.mac32e = mac intel64 attr.win32e = win intel64 win @@ -53,19 +55,53 @@ $(error Building with the parameters library is not available on Windows OS) endif endif -COMPILERs = icc icx gnu clang vc -COMPILER ?= icc +ifeq ($(PLAT),lnx32e) + BACKEND_CONFIG ?= mkl + ARCH = 32e +else ifeq ($(PLAT),mac32e) + BACKEND_CONFIG ?= mkl + ARCH = 32e +else ifeq ($(PLAT),win32e) + BACKEND_CONFIG ?= mkl + ARCH = 32e +else ifeq ($(PLAT),lnxarm) + BACKEND_CONFIG ?= ref + ARCH = arm +else + $(error Unsupported platform: $(PLAT)) +endif -BACKEND_CONFIG ?= mkl +ARCH_is_$(ARCH) := yes + +ifeq ($(ARCH_is_32e),yes) + $(if $(filter mkl ref,$(BACKEND_CONFIG)),,$(error Unsupported backend config '$(BACKEND_CONFIG)'. \ + Supported config for '${PLAT}' are ['mkl','ref'])) + COMPILERs = icc icx gnu clang vc + COMPILER ?= icc +else + $(if $(filter ref,$(BACKEND_CONFIG)),,$(error Unsupported backend config '$(BACKEND_CONFIG)'. \ + Supported config for '${PLAT}' is 'ref')) + COMPILERs = gnu clang + COMPILER ?= gnu +endif $(if $(filter $(COMPILERs),$(COMPILER)),,$(error COMPILER must be one of $(COMPILERs))) -CPUs := sse2 sse42 avx2 avx512 -CPUs.files := nrh neh hsw skx +ifeq ($(ARCH_is_32e),yes) + CPUs := sse2 sse42 avx2 avx512 + CPUs.files := nrh neh hsw skx +else + CPUs := sve + CPUs.files := a8sve +endif USERREQCPU := $(filter-out $(filter $(CPUs),$(REQCPU)),$(REQCPU)) USECPUS := $(if $(REQCPU),$(if $(USERREQCPU),$(error Unsupported value/s in REQCPU: $(USERREQCPU). List of supported CPUs: $(CPUs)),$(REQCPU)),$(CPUs)) -USECPUS := $(if $(filter sse2,$(USECPUS)),$(USECPUS),sse2 $(USECPUS)) +ifeq ($(ARCH_is_32e),yes) + USECPUS := $(if $(filter sse2,$(USECPUS)),$(USECPUS),sse2 $(USECPUS)) +else + USECPUS := $(if $(filter sve,$(USECPUS)),$(USECPUS),sve $(USECPUS)) +endif $(info Selected list of CPUs - USECPUS: $(USECPUS)) @@ -91,7 +127,7 @@ endif DPC.COMPILE.gcc_toolchain := $(GCC_TOOLCHAIN_PATH) endif -include dev/make/cmplr.$(COMPILER).$(BACKEND_CONFIG).mk +include dev/make/cmplr.$(COMPILER).$(BACKEND_CONFIG).$(ARCH).mk include dev/make/cmplr.dpcpp.mk $(if $(filter $(PLATs.$(COMPILER)),$(PLAT)),,$(error PLAT for $(COMPILER) must be defined to one of $(PLATs.$(COMPILER)))) @@ -133,21 +169,36 @@ y := $(notdir $(filter $(_OS)/%,lnx/so win/dll mac/dylib)) -eGRP = $(if $(OS_is_lnx),-Wl$(comma)--end-group,) daalmake = make -p4_OPT := $(p4_OPT.$(COMPILER)) -mc3_OPT := $(mc3_OPT.$(COMPILER)) -avx2_OPT := $(avx2_OPT.$(COMPILER)) -skx_OPT := $(skx_OPT.$(COMPILER)) +ifeq ($(ARCH_is_32e),yes) + p4_OPT := $(p4_OPT.$(COMPILER)) + mc3_OPT := $(mc3_OPT.$(COMPILER)) + avx2_OPT := $(avx2_OPT.$(COMPILER)) + skx_OPT := $(skx_OPT.$(COMPILER)) +else + a8sve_OPT := $(a8sve_OPT.$(COMPILER)) +endif _OSr := $(if $(OS_is_win),win,$(if $(OS_is_lnx),lin,)) -USECPUS.files := $(subst sse2,nrh,$(subst sse42,neh,$(subst avx2,hsw,$(subst avx512,skx,$(USECPUS))))) +ifeq ($(ARCH_is_32e),yes) + USECPUS.files := $(subst sse2,nrh,$(subst sse42,neh,$(subst avx2,hsw,$(subst avx512,skx,$(USECPUS))))) +else + USECPUS.files := $(subst sve,a8sve,$(USECPUS)) +endif + USECPUS.out := $(filter-out $(USECPUS),$(CPUs)) USECPUS.out.for.grep.filter := $(addprefix _,$(addsuffix _,$(subst $(space),_|_,$(USECPUS.out)))) USECPUS.out.grep.filter := $(if $(USECPUS.out),| grep -v -E '$(USECPUS.out.for.grep.filter)') -USECPUS.out.defs := $(subst sse2,^\#define DAAL_KERNEL_SSE2$(sed.eow),\ - $(subst sse42,^\#define DAAL_KERNEL_SSE42$(sed.eow),\ - $(subst avx2,^\#define DAAL_KERNEL_AVX2$(sed.eow),\ - $(subst avx512,^\#define DAAL_KERNEL_AVX512$(sed.eow),$(USECPUS.out))))) + +ifeq ($(ARCH_is_32e),yes) + USECPUS.out.defs := $(subst sse2,^\#define DAAL_KERNEL_SSE2$(sed.eow),\ + $(subst sse42,^\#define DAAL_KERNEL_SSE42$(sed.eow),\ + $(subst avx2,^\#define DAAL_KERNEL_AVX2$(sed.eow),\ + $(subst avx512,^\#define DAAL_KERNEL_AVX512$(sed.eow),$(USECPUS.out))))) +else + USECPUS.out.defs := $(subst sve,^\#define DAAL_KERNEL_SVE$(sed.eow),$(USECPUS.out)) +endif + USECPUS.out.defs := $(subst $(space)^,|^,$(strip $(USECPUS.out.defs))) USECPUS.out.defs.filter := $(if $(USECPUS.out.defs),sed $(sed.-b) $(sed.-i) -E -e 's/$(USECPUS.out.defs)/$(sed.eol)/') @@ -355,6 +406,11 @@ daaldep.lnx32e.rt.seq := -lpthread $(daaldep.lnx32e.rt.$(COMPILER)) $(if $(COV.l daaldep.lnx32e.rt.dpc := -lpthread -lOpenCL $(if $(COV.libia),$(COV.libia)/libcov.a) daaldep.lnx32e.threxport := export_lnx32e.$(BACKEND_CONFIG).def +daaldep.lnxarm.rt.thr := -L$(TBBDIR.soia.lnx) -ltbb -ltbbmalloc -lpthread $(daaldep.lnxarm.rt.$(COMPILER)) $(if $(COV.libia),$(COV.libia)/libcov.a) +daaldep.lnxarm.rt.seq := -lpthread $(daaldep.lnxarm.rt.$(COMPILER)) $(if $(COV.libia),$(COV.libia)/libcov.a) +daaldep.lnxarm.rt.dpc := -lpthread -lOpenCL $(if $(COV.libia),$(COV.libia)/libcov.a) +daaldep.lnxarm.threxport := export_lnxarm.$(BACKEND_CONFIG).def + daaldep.lnx.threxport.create = grep -v -E '^(EXPORTS|;|$$)' $< $(USECPUS.out.grep.filter) | sed -e 's/^/-u /' daaldep.win32e.rt.thr := -LIBPATH:$(RELEASEDIR.tbb.libia) $(dep_thr) $(if $(CHECK_DLL_SIG),Wintrust.lib) @@ -514,10 +570,14 @@ $(CORE.objs_a): COPT += -D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \ $(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG) $(CORE.objs_a): COPT += @$(CORE.tmpdir_a)/inc_a_folders.txt $(filter %threading.$o, $(CORE.objs_a)): COPT += -D__DO_TBB_LAYER__ + +ifeq ($(ARCH_is_32e),yes) $(call containing,_nrh, $(CORE.objs_a)): COPT += $(p4_OPT) -DDAAL_CPU=sse2 $(call containing,_neh, $(CORE.objs_a)): COPT += $(mc3_OPT) -DDAAL_CPU=sse42 $(call containing,_hsw, $(CORE.objs_a)): COPT += $(avx2_OPT) -DDAAL_CPU=avx2 $(call containing,_skx, $(CORE.objs_a)): COPT += $(skx_OPT) -DDAAL_CPU=avx512 +endif + $(call containing,_flt, $(CORE.objs_a)): COPT += -DDAAL_FPTYPE=float $(call containing,_dbl, $(CORE.objs_a)): COPT += -DDAAL_FPTYPE=double @@ -529,10 +589,14 @@ $(CORE.objs_y): COPT += -D__DAAL_IMPLEMENTATION \ $(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG) $(CORE.objs_y): COPT += @$(CORE.tmpdir_y)/inc_y_folders.txt $(filter %threading.$o, $(CORE.objs_y)): COPT += -D__DO_TBB_LAYER__ + +ifeq ($(ARCH_is_32e),yes) $(call containing,_nrh, $(CORE.objs_y)): COPT += $(p4_OPT) -DDAAL_CPU=sse2 $(call containing,_neh, $(CORE.objs_y)): COPT += $(mc3_OPT) -DDAAL_CPU=sse42 $(call containing,_hsw, $(CORE.objs_y)): COPT += $(avx2_OPT) -DDAAL_CPU=avx2 $(call containing,_skx, $(CORE.objs_y)): COPT += $(skx_OPT) -DDAAL_CPU=avx512 +endif + $(call containing,_flt, $(CORE.objs_y)): COPT += -DDAAL_FPTYPE=float $(call containing,_dbl, $(CORE.objs_y)): COPT += -DDAAL_FPTYPE=double @@ -550,10 +614,16 @@ define .compile.template.ay $(eval template_source_cpp := $(subst .$o,.cpp,$(notdir $1))) $(eval template_source_cpp := $(subst _fpt_flt,_fpt,$(template_source_cpp))) $(eval template_source_cpp := $(subst _fpt_dbl,_fpt,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_nrh,_cpu,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_neh,_cpu,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_hsw,_cpu,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_skx,_cpu,$(template_source_cpp))) + +ifeq ($(ARCH_is_32e),yes) + $(eval template_source_cpp := $(subst _cpu_nrh,_cpu,$(template_source_cpp))) + $(eval template_source_cpp := $(subst _cpu_neh,_cpu,$(template_source_cpp))) + $(eval template_source_cpp := $(subst _cpu_hsw,_cpu,$(template_source_cpp))) + $(eval template_source_cpp := $(subst _cpu_skx,_cpu,$(template_source_cpp))) +else + $(eval template_source_cpp := $(subst _cpu_a8sve,_cpu,$(template_source_cpp))) +endif + $1: $(template_source_cpp) ; $(value C.COMPILE) endef @@ -582,10 +652,15 @@ ONEAPI.incdirs.thirdp := $(CORE.incdirs.common) $(daaldep.math_backend.incdir) $ ONEAPI.incdirs := $(ONEAPI.incdirs.common) $(CORE.incdirs.thirdp) $(ONEAPI.incdirs.thirdp) ONEAPI.dispatcher_cpu = $(WORKDIR)/oneapi/dal/_dal_cpu_dispatcher_gen.hpp -ONEAPI.dispatcher_tag.nrh := -D__CPU_TAG__=__CPU_TAG_SSE2__ -ONEAPI.dispatcher_tag.neh := -D__CPU_TAG__=__CPU_TAG_SSE42__ -ONEAPI.dispatcher_tag.hsw := -D__CPU_TAG__=__CPU_TAG_AVX2__ -ONEAPI.dispatcher_tag.skx := -D__CPU_TAG__=__CPU_TAG_AVX512__ + +ifeq ($(ARCH_is_32e),yes) + ONEAPI.dispatcher_tag.nrh := -D__CPU_TAG__=__CPU_TAG_SSE2__ + ONEAPI.dispatcher_tag.neh := -D__CPU_TAG__=__CPU_TAG_SSE42__ + ONEAPI.dispatcher_tag.hsw := -D__CPU_TAG__=__CPU_TAG_AVX2__ + ONEAPI.dispatcher_tag.skx := -D__CPU_TAG__=__CPU_TAG_AVX512__ +else + ONEAPI.dispatcher_tag.a8sve := -D__CPU_TAG__=__CPU_TAG_ARMV8SVE__ +endif ONEAPI.srcdir := $(CPPDIR.onedal) ONEAPI.srcdirs.base := $(ONEAPI.srcdir) \ @@ -628,10 +703,16 @@ ONEAPI.objs_y.all := $(ONEAPI.objs_y) $(ONEAPI.objs_y.dpc) define .populate_cpus $(eval non_cpu_files := $(call notcontaining,_cpu,$2)) $(eval cpu_files := $(call containing,_cpu,$2)) -$(eval nrh_files := $(subst _nrh,_cpu_nrh,$(call containing,_nrh,$(non_cpu_files)))) -$(eval neh_files := $(subst _neh,_cpu_neh,$(call containing,_neh,$(non_cpu_files)))) -$(eval hsw_files := $(subst _hsw,_cpu_hsw,$(call containing,_hsw,$(non_cpu_files)))) -$(eval skx_files := $(subst _skx,_cpu_skx,$(call containing,_skx,$(non_cpu_files)))) + +ifeq ($(ARCH_is_32e),yes) + $(eval nrh_files := $(subst _nrh,_cpu_nrh,$(call containing,_nrh,$(non_cpu_files)))) + $(eval neh_files := $(subst _neh,_cpu_neh,$(call containing,_neh,$(non_cpu_files)))) + $(eval hsw_files := $(subst _hsw,_cpu_hsw,$(call containing,_hsw,$(non_cpu_files)))) + $(eval skx_files := $(subst _skx,_cpu_skx,$(call containing,_skx,$(non_cpu_files)))) +else + $(eval a8sve_files := $(subst _a8sve,_cpu_a8sve,$(call containing,_a8sve,$(non_cpu_files)))) +endif + $(eval user_cpu_files := $(nrh_files) $(neh_files) $(hsw_files) $(skx_files)) $(eval populated_cpu_files := $(foreach ccc,$(USECPUS.files),$(subst _cpu,_cpu_$(ccc),$(cpu_files)))) $(eval populated_cpu_files := $(filter-out $(user_cpu_files),$(populated_cpu_files))) @@ -655,10 +736,16 @@ $(eval $(call .populate_cpus,ONEAPI.objs_y.dpc,$(ONEAPI.objs_y.dpc))) define .ONEAPI.compile $(eval template_source_cpp := $(1:$2/%.$o=%.cpp)) $(eval template_source_cpp := $(subst -,/,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_nrh,_cpu,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_neh,_cpu,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_hsw,_cpu,$(template_source_cpp))) -$(eval template_source_cpp := $(subst _cpu_skx,_cpu,$(template_source_cpp))) + +ifeq ($(ARCH_is_32e),yes) + $(eval template_source_cpp := $(subst _cpu_nrh,_cpu,$(template_source_cpp))) + $(eval template_source_cpp := $(subst _cpu_neh,_cpu,$(template_source_cpp))) + $(eval template_source_cpp := $(subst _cpu_hsw,_cpu,$(template_source_cpp))) + $(eval template_source_cpp := $(subst _cpu_skx,_cpu,$(template_source_cpp))) +else + $(eval template_source_cpp := $(subst _cpu_a8sve,_cpu,$(template_source_cpp))) +endif + $1: $(template_source_cpp) | $(dir $1)/. ; $(value $3.COMPILE) endef @@ -672,9 +759,13 @@ $1: $(1:%.$a=%_link.txt) | $(dir $1)/. ; $(value LINK.STATIC) endef $(ONEAPI.dispatcher_cpu): | $(dir $(ONEAPI.dispatcher_cpu))/. +ifeq ($(ARCH_is_32e),yes) $(if $(filter sse42,$(USECPUS)),echo "#define ONEDAL_CPU_DISPATCH_SSE42" >> $@) $(if $(filter avx2,$(USECPUS)),echo "#define ONEDAL_CPU_DISPATCH_AVX2" >> $@) $(if $(filter avx512,$(USECPUS)),echo "#define ONEDAL_CPU_DISPATCH_AVX512" >> $@) +else + $(if $(filter sve,$(USECPUS)),echo "#define ONEDAL_CPU_DISPATCH_A8SVE" >> $@) +endif # Create file with include paths ONEAPI.include_options := $(addprefix -I, $(ONEAPI.incdirs.common)) \ @@ -701,10 +792,14 @@ $(ONEAPI.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-EHsc) $(pedantic -D__TBB_NO_IMPLICIT_LINKAGE \ -DTBB_USE_ASSERT=0 \ @$(ONEAPI.tmpdir_a)/inc_a_folders.txt -$(call containing,_nrh, $(ONEAPI.objs_a)): COPT += $(p4_OPT) $(ONEAPI.dispatcher_tag.nrh) -$(call containing,_neh, $(ONEAPI.objs_a)): COPT += $(mc3_OPT) $(ONEAPI.dispatcher_tag.neh) -$(call containing,_hsw, $(ONEAPI.objs_a)): COPT += $(avx2_OPT) $(ONEAPI.dispatcher_tag.hsw) -$(call containing,_skx, $(ONEAPI.objs_a)): COPT += $(skx_OPT) $(ONEAPI.dispatcher_tag.skx) +ifeq ($(ARCH_is_32e),yes) + $(call containing,_nrh, $(ONEAPI.objs_a)): COPT += $(p4_OPT) $(ONEAPI.dispatcher_tag.nrh) + $(call containing,_neh, $(ONEAPI.objs_a)): COPT += $(mc3_OPT) $(ONEAPI.dispatcher_tag.neh) + $(call containing,_hsw, $(ONEAPI.objs_a)): COPT += $(avx2_OPT) $(ONEAPI.dispatcher_tag.hsw) + $(call containing,_skx, $(ONEAPI.objs_a)): COPT += $(skx_OPT) $(ONEAPI.dispatcher_tag.skx) +else + $(call containing,_a8sve, $(ONEAPI.objs_a)): COPT += $(a8sve_OPT) $(ONEAPI.dispatcher_tag.a8sve) +endif $(ONEAPI.objs_a.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_a.dpc)/inc_a_folders.txt $(ONEAPI.objs_a.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ @@ -716,10 +811,14 @@ $(ONEAPI.objs_a.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.op -D_ENABLE_ATOMIC_ALIGNMENT_FIX \ -DTBB_USE_ASSERT=0 \ @$(ONEAPI.tmpdir_a.dpc)/inc_a_folders.txt -$(call containing,_nrh, $(ONEAPI.objs_a.dpc)): COPT += $(p4_OPT.dpcpp) $(ONEAPI.dispatcher_tag.nrh) -$(call containing,_neh, $(ONEAPI.objs_a.dpc)): COPT += $(mc3_OPT.dpcpp) $(ONEAPI.dispatcher_tag.neh) -$(call containing,_hsw, $(ONEAPI.objs_a.dpc)): COPT += $(avx2_OPT.dpcpp) $(ONEAPI.dispatcher_tag.hsw) -$(call containing,_skx, $(ONEAPI.objs_a.dpc)): COPT += $(skx_OPT.dpcpp) $(ONEAPI.dispatcher_tag.skx) +ifeq ($(ARCH_is_32e),yes) + $(call containing,_nrh, $(ONEAPI.objs_a.dpc)): COPT += $(p4_OPT.dpcpp) $(ONEAPI.dispatcher_tag.nrh) + $(call containing,_neh, $(ONEAPI.objs_a.dpc)): COPT += $(mc3_OPT.dpcpp) $(ONEAPI.dispatcher_tag.neh) + $(call containing,_hsw, $(ONEAPI.objs_a.dpc)): COPT += $(avx2_OPT.dpcpp) $(ONEAPI.dispatcher_tag.hsw) + $(call containing,_skx, $(ONEAPI.objs_a.dpc)): COPT += $(skx_OPT.dpcpp) $(ONEAPI.dispatcher_tag.skx) +else + $(call containing,_a8sve, $(ONEAPI.objs_a.dpc)): COPT += $(a8sve_OPT.dpcpp) $(ONEAPI.dispatcher_tag.a8sve) +endif # Set compilation options to the object files which are part of DYNAMIC lib $(ONEAPI.objs_y): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y)/inc_y_folders.txt @@ -732,10 +831,14 @@ $(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-EHsc) $(pedantic -D__TBB_NO_IMPLICIT_LINKAGE \ -DTBB_USE_ASSERT=0 \ @$(ONEAPI.tmpdir_y)/inc_y_folders.txt -$(call containing,_nrh, $(ONEAPI.objs_y)): COPT += $(p4_OPT) $(ONEAPI.dispatcher_tag.nrh) -$(call containing,_neh, $(ONEAPI.objs_y)): COPT += $(mc3_OPT) $(ONEAPI.dispatcher_tag.neh) -$(call containing,_hsw, $(ONEAPI.objs_y)): COPT += $(avx2_OPT) $(ONEAPI.dispatcher_tag.hsw) -$(call containing,_skx, $(ONEAPI.objs_y)): COPT += $(skx_OPT) $(ONEAPI.dispatcher_tag.skx) +ifeq ($(ARCH_is_32e),yes) + $(call containing,_nrh, $(ONEAPI.objs_y)): COPT += $(p4_OPT) $(ONEAPI.dispatcher_tag.nrh) + $(call containing,_neh, $(ONEAPI.objs_y)): COPT += $(mc3_OPT) $(ONEAPI.dispatcher_tag.neh) + $(call containing,_hsw, $(ONEAPI.objs_y)): COPT += $(avx2_OPT) $(ONEAPI.dispatcher_tag.hsw) + $(call containing,_skx, $(ONEAPI.objs_y)): COPT += $(skx_OPT) $(ONEAPI.dispatcher_tag.skx) +else + $(call containing,_a8sve, $(ONEAPI.objs_y)): COPT += $(a8sve_OPT) $(ONEAPI.dispatcher_tag.a8sve) +endif $(ONEAPI.objs_y.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt $(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ @@ -749,10 +852,14 @@ $(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.op -D__TBB_NO_IMPLICIT_LINKAGE \ -DTBB_USE_ASSERT=0 \ @$(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt -$(call containing,_nrh, $(ONEAPI.objs_y.dpc)): COPT += $(p4_OPT.dpcpp) $(ONEAPI.dispatcher_tag.nrh) -$(call containing,_neh, $(ONEAPI.objs_y.dpc)): COPT += $(mc3_OPT.dpcpp) $(ONEAPI.dispatcher_tag.neh) -$(call containing,_hsw, $(ONEAPI.objs_y.dpc)): COPT += $(avx2_OPT.dpcpp) $(ONEAPI.dispatcher_tag.hsw) -$(call containing,_skx, $(ONEAPI.objs_y.dpc)): COPT += $(skx_OPT.dpcpp) $(ONEAPI.dispatcher_tag.skx) +ifeq ($(ARCH_is_32e),yes) + $(call containing,_nrh, $(ONEAPI.objs_y.dpc)): COPT += $(p4_OPT.dpcpp) $(ONEAPI.dispatcher_tag.nrh) + $(call containing,_neh, $(ONEAPI.objs_y.dpc)): COPT += $(mc3_OPT.dpcpp) $(ONEAPI.dispatcher_tag.neh) + $(call containing,_hsw, $(ONEAPI.objs_y.dpc)): COPT += $(avx2_OPT.dpcpp) $(ONEAPI.dispatcher_tag.hsw) + $(call containing,_skx, $(ONEAPI.objs_y.dpc)): COPT += $(skx_OPT.dpcpp) $(ONEAPI.dispatcher_tag.skx) +else + $(call containing,_a8sve, $(ONEAPI.objs_y.dpc)): COPT += $(a8sve_OPT.dpcpp) $(ONEAPI.dispatcher_tag.a8sve) +endif # Filtering parameter files PARAMETERS.objs_a.filtered := $(filter %parameters.$(o),$(ONEAPI.objs_a)) @@ -1122,8 +1229,15 @@ $(foreach t,$(releasetbb.LIBS_Y),$(eval $(call .release.t,$t,$(RELEASEDIR.tbb.so $(foreach t,$(releasetbb.LIBS_A),$(eval $(call .release.t,$t,$(RELEASEDIR.tbb.libia)))) #----- cmake configs generation +ARCH_DIR_ONEDAL= +ifeq ($(ARCH),32e) + ARCH_DIR_ONEDAL=intel64 +else + ARCH_DIR_ONEDAL=arm +endif + _release_cmake_configs: - $(if $(shell bash -c "command -v cmake"),cmake -DINSTALL_DIR=$(RELEASEDIR.lib)/cmake/oneDAL -P cmake/scripts/generate_config.cmake,echo 'cmake configs generation skipped') + $(if $(shell bash -c "command -v cmake"),cmake -DINSTALL_DIR=$(RELEASEDIR.lib)/cmake/oneDAL -DARCH_DIR_ONEDAL=$(ARCH_DIR_ONEDAL) -P cmake/scripts/generate_config.cmake,echo 'cmake configs generation skipped') #----- nuspecs generation _release_common: _release_nuspec