diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index ea72c595a9b807..e5b52db175d960 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -271,7 +271,7 @@ endif() # These files are used on 32-bit and 64-bit x86. set(x86_ARCH_SOURCES - cpu_model.c + cpu_model/x86.c ) if (NOT MSVC) @@ -556,7 +556,7 @@ endif() set(aarch64_SOURCES ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES} - cpu_model.c + cpu_model/aarch64.c aarch64/fp_mode.c ) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c new file mode 100644 index 00000000000000..3ed2a85e3413a9 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -0,0 +1,143 @@ +//===-- cpu_model/aarch64.c - Support for __cpu_model builtin ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is based on LLVM's lib/Support/Host.cpp. +// It implements __aarch64_have_lse_atomics, __aarch64_cpu_features for +// AArch64. +// +//===----------------------------------------------------------------------===// + +#include "cpu_model.h" + +#if !defined(__aarch64__) +#error This file is intended only for aarch64-based targets +#endif + +#if __has_include() +#include +#else +typedef struct __ifunc_arg_t { + unsigned long _size; + unsigned long _hwcap; + unsigned long _hwcap2; +} __ifunc_arg_t; +#endif // __has_include() + +// LSE support detection for out-of-line atomics +// using HWCAP and Auxiliary vector +_Bool __aarch64_have_lse_atomics + __attribute__((visibility("hidden"), nocommon)) = false; + +#if defined(__FreeBSD__) +#include "lse_atomics/freebsd.inc" +#elif defined(__Fuchsia__) +#include "lse_atomics/fuchsia.inc" +#elif defined(__ANDROID__) +#include "lse_atomics/android.inc" +#elif __has_include() +#include "lse_atomics/sysauxv.inc" +#else +// When unimplemented, we leave __aarch64_have_lse_atomics initialized to false. +#endif + +#if !defined(DISABLE_AARCH64_FMV) +// CPUFeatures must correspond to the same AArch64 features in +// AArch64TargetParser.h +enum CPUFeatures { + FEAT_RNG, + FEAT_FLAGM, + FEAT_FLAGM2, + FEAT_FP16FML, + FEAT_DOTPROD, + FEAT_SM4, + FEAT_RDM, + FEAT_LSE, + FEAT_FP, + FEAT_SIMD, + FEAT_CRC, + FEAT_SHA1, + FEAT_SHA2, + FEAT_SHA3, + FEAT_AES, + FEAT_PMULL, + FEAT_FP16, + FEAT_DIT, + FEAT_DPB, + FEAT_DPB2, + FEAT_JSCVT, + FEAT_FCMA, + FEAT_RCPC, + FEAT_RCPC2, + FEAT_FRINTTS, + FEAT_DGH, + FEAT_I8MM, + FEAT_BF16, + FEAT_EBF16, + FEAT_RPRES, + FEAT_SVE, + FEAT_SVE_BF16, + FEAT_SVE_EBF16, + FEAT_SVE_I8MM, + FEAT_SVE_F32MM, + FEAT_SVE_F64MM, + FEAT_SVE2, + FEAT_SVE_AES, + FEAT_SVE_PMULL128, + FEAT_SVE_BITPERM, + FEAT_SVE_SHA3, + FEAT_SVE_SM4, + FEAT_SME, + FEAT_MEMTAG, + FEAT_MEMTAG2, + FEAT_MEMTAG3, + FEAT_SB, + FEAT_PREDRES, + FEAT_SSBS, + FEAT_SSBS2, + FEAT_BTI, + FEAT_LS64, + FEAT_LS64_V, + FEAT_LS64_ACCDATA, + FEAT_WFXT, + FEAT_SME_F64, + FEAT_SME_I64, + FEAT_SME2, + FEAT_RCPC3, + FEAT_MAX, + FEAT_EXT = 62, // Reserved to indicate presence of additional features field + // in __aarch64_cpu_features + FEAT_INIT // Used as flag of features initialization completion +}; + +// Architecture features used +// in Function Multi Versioning +struct { + unsigned long long features; + // As features grows new fields could be added +} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon)); + +// The formatter wants to re-order these includes, but doing so is incorrect: +// clang-format off +#if defined(__FreeBSD__) +#include "aarch64/fmv/mrs.inc" +#include "aarch64/fmv/freebsd.inc" +#elif defined(__Fuchsia__) +#include "aarch64/fmv/mrs.inc" +#include "aarch64/fmv/fuchsia.inc" +#elif defined(__ANDROID__) +#include "aarch64/fmv/mrs.inc" +#include "aarch64/fmv/android.inc" +#elif __has_include() +#include "aarch64/fmv/mrs.inc" +#include "aarch64/fmv/sysauxv.inc" +#else +#include "aarch64/fmv/unimplemented.inc" +#endif +// clang-format on + +#endif // !defined(DISABLE_AARCH64_FMV) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc new file mode 100644 index 00000000000000..a80186ba064d41 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc @@ -0,0 +1,33 @@ +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) { + if (__aarch64_cpu_features.features) + return; + + // ifunc resolvers don't have hwcaps in arguments on Android API lower + // than 30. If so, set feature detection done and keep all CPU features + // unsupported (zeros). To detect this case in runtime we check existence + // of memfd_create function from Standard C library which was introduced in + // Android API 30. + int memfd_create(const char *, unsigned int) __attribute__((weak)); + if (!memfd_create) + return; + + __init_cpu_features_constructor(hwcap, arg); +} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + // CPU features already initialized. + if (__aarch64_cpu_features.features) + return; + + // Don't set any CPU features, + // detection could be wrong on Exynos 9810. + if (__isExynos9810()) + return; + + __ifunc_arg_t arg; + arg._size = sizeof(__ifunc_arg_t); + arg._hwcap = getauxval(AT_HWCAP); + arg._hwcap2 = getauxval(AT_HWCAP2); + __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg); +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc new file mode 100644 index 00000000000000..793adef44b9365 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc @@ -0,0 +1,27 @@ +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) { + if (__aarch64_cpu_features.features) + return; + + __init_cpu_features_constructor(hwcap, arg); +} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + unsigned long hwcap = 0; + unsigned long hwcap2 = 0; + // CPU features already initialized. + if (__aarch64_cpu_features.features) + return; + + int res = 0; + res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); + res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2); + if (res) + return; + + __ifunc_arg_t arg; + arg._size = sizeof(__ifunc_arg_t); + arg._hwcap = hwcap; + arg._hwcap2 = hwcap2; + __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg); +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc new file mode 100644 index 00000000000000..7db1dbd0852d84 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc @@ -0,0 +1,19 @@ +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) { + if (__aarch64_cpu_features.features) + return; + + __init_cpu_features_constructor(hwcap, arg); +} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + // CPU features already initialized. + if (__aarch64_cpu_features.features) + return; + + __ifunc_arg_t arg; + arg._size = sizeof(__ifunc_arg_t); + arg._hwcap = getauxval(AT_HWCAP); + arg._hwcap2 = getauxval(AT_HWCAP2); + __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg); +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc new file mode 100644 index 00000000000000..282bda11566ec7 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc @@ -0,0 +1,375 @@ +#if __has_include() +#include +#define HAVE_SYS_AUXV_H +#endif + +#if __has_include() +#include +#define HAVE_SYS_HWCAP_H +#endif + +#ifndef _IFUNC_ARG_HWCAP +#define _IFUNC_ARG_HWCAP (1ULL << 62) +#endif +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif +#ifndef HWCAP_CPUID +#define HWCAP_CPUID (1 << 11) +#endif +#ifndef HWCAP_FP +#define HWCAP_FP (1 << 0) +#endif +#ifndef HWCAP_ASIMD +#define HWCAP_ASIMD (1 << 1) +#endif +#ifndef HWCAP_AES +#define HWCAP_AES (1 << 3) +#endif +#ifndef HWCAP_PMULL +#define HWCAP_PMULL (1 << 4) +#endif +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1 << 5) +#endif +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1 << 6) +#endif +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif +#ifndef HWCAP_ATOMICS +#define HWCAP_ATOMICS (1 << 8) +#endif +#ifndef HWCAP_FPHP +#define HWCAP_FPHP (1 << 9) +#endif +#ifndef HWCAP_ASIMDHP +#define HWCAP_ASIMDHP (1 << 10) +#endif +#ifndef HWCAP_ASIMDRDM +#define HWCAP_ASIMDRDM (1 << 12) +#endif +#ifndef HWCAP_JSCVT +#define HWCAP_JSCVT (1 << 13) +#endif +#ifndef HWCAP_FCMA +#define HWCAP_FCMA (1 << 14) +#endif +#ifndef HWCAP_LRCPC +#define HWCAP_LRCPC (1 << 15) +#endif +#ifndef HWCAP_DCPOP +#define HWCAP_DCPOP (1 << 16) +#endif +#ifndef HWCAP_SHA3 +#define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SM3 +#define HWCAP_SM3 (1 << 18) +#endif +#ifndef HWCAP_SM4 +#define HWCAP_SM4 (1 << 19) +#endif +#ifndef HWCAP_ASIMDDP +#define HWCAP_ASIMDDP (1 << 20) +#endif +#ifndef HWCAP_SHA512 +#define HWCAP_SHA512 (1 << 21) +#endif +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif +#ifndef HWCAP_ASIMDFHM +#define HWCAP_ASIMDFHM (1 << 23) +#endif +#ifndef HWCAP_DIT +#define HWCAP_DIT (1 << 24) +#endif +#ifndef HWCAP_ILRCPC +#define HWCAP_ILRCPC (1 << 26) +#endif +#ifndef HWCAP_FLAGM +#define HWCAP_FLAGM (1 << 27) +#endif +#ifndef HWCAP_SSBS +#define HWCAP_SSBS (1 << 28) +#endif +#ifndef HWCAP_SB +#define HWCAP_SB (1 << 29) +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif +#ifndef HWCAP2_DCPODP +#define HWCAP2_DCPODP (1 << 0) +#endif +#ifndef HWCAP2_SVE2 +#define HWCAP2_SVE2 (1 << 1) +#endif +#ifndef HWCAP2_SVEAES +#define HWCAP2_SVEAES (1 << 2) +#endif +#ifndef HWCAP2_SVEPMULL +#define HWCAP2_SVEPMULL (1 << 3) +#endif +#ifndef HWCAP2_SVEBITPERM +#define HWCAP2_SVEBITPERM (1 << 4) +#endif +#ifndef HWCAP2_SVESHA3 +#define HWCAP2_SVESHA3 (1 << 5) +#endif +#ifndef HWCAP2_SVESM4 +#define HWCAP2_SVESM4 (1 << 6) +#endif +#ifndef HWCAP2_FLAGM2 +#define HWCAP2_FLAGM2 (1 << 7) +#endif +#ifndef HWCAP2_FRINT +#define HWCAP2_FRINT (1 << 8) +#endif +#ifndef HWCAP2_SVEI8MM +#define HWCAP2_SVEI8MM (1 << 9) +#endif +#ifndef HWCAP2_SVEF32MM +#define HWCAP2_SVEF32MM (1 << 10) +#endif +#ifndef HWCAP2_SVEF64MM +#define HWCAP2_SVEF64MM (1 << 11) +#endif +#ifndef HWCAP2_SVEBF16 +#define HWCAP2_SVEBF16 (1 << 12) +#endif +#ifndef HWCAP2_I8MM +#define HWCAP2_I8MM (1 << 13) +#endif +#ifndef HWCAP2_BF16 +#define HWCAP2_BF16 (1 << 14) +#endif +#ifndef HWCAP2_DGH +#define HWCAP2_DGH (1 << 15) +#endif +#ifndef HWCAP2_RNG +#define HWCAP2_RNG (1 << 16) +#endif +#ifndef HWCAP2_BTI +#define HWCAP2_BTI (1 << 17) +#endif +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif +#ifndef HWCAP2_RPRES +#define HWCAP2_RPRES (1 << 21) +#endif +#ifndef HWCAP2_MTE3 +#define HWCAP2_MTE3 (1 << 22) +#endif +#ifndef HWCAP2_SME +#define HWCAP2_SME (1 << 23) +#endif +#ifndef HWCAP2_SME_I16I64 +#define HWCAP2_SME_I16I64 (1 << 24) +#endif +#ifndef HWCAP2_SME_F64F64 +#define HWCAP2_SME_F64F64 (1 << 25) +#endif +#ifndef HWCAP2_WFXT +#define HWCAP2_WFXT (1UL << 31) +#endif +#ifndef HWCAP2_EBF16 +#define HWCAP2_EBF16 (1ULL << 32) +#endif +#ifndef HWCAP2_SVE_EBF16 +#define HWCAP2_SVE_EBF16 (1ULL << 33) +#endif + +static void __init_cpu_features_constructor(unsigned long hwcap, + const __ifunc_arg_t *arg) { +#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F +#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) +#define extractBits(val, start, number) \ + (val & ((1ULL << number) - 1ULL) << start) >> start + unsigned long hwcap2 = 0; + if (hwcap & _IFUNC_ARG_HWCAP) + hwcap2 = arg->_hwcap2; + if (hwcap & HWCAP_CRC32) + setCPUFeature(FEAT_CRC); + if (hwcap & HWCAP_PMULL) + setCPUFeature(FEAT_PMULL); + if (hwcap & HWCAP_FLAGM) + setCPUFeature(FEAT_FLAGM); + if (hwcap2 & HWCAP2_FLAGM2) { + setCPUFeature(FEAT_FLAGM); + setCPUFeature(FEAT_FLAGM2); + } + if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) + setCPUFeature(FEAT_SM4); + if (hwcap & HWCAP_ASIMDDP) + setCPUFeature(FEAT_DOTPROD); + if (hwcap & HWCAP_ASIMDFHM) + setCPUFeature(FEAT_FP16FML); + if (hwcap & HWCAP_FPHP) { + setCPUFeature(FEAT_FP16); + setCPUFeature(FEAT_FP); + } + if (hwcap & HWCAP_DIT) + setCPUFeature(FEAT_DIT); + if (hwcap & HWCAP_ASIMDRDM) + setCPUFeature(FEAT_RDM); + if (hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC2); + if (hwcap & HWCAP_AES) + setCPUFeature(FEAT_AES); + if (hwcap & HWCAP_SHA1) + setCPUFeature(FEAT_SHA1); + if (hwcap & HWCAP_SHA2) + setCPUFeature(FEAT_SHA2); + if (hwcap & HWCAP_JSCVT) + setCPUFeature(FEAT_JSCVT); + if (hwcap & HWCAP_FCMA) + setCPUFeature(FEAT_FCMA); + if (hwcap & HWCAP_SB) + setCPUFeature(FEAT_SB); + if (hwcap & HWCAP_SSBS) + setCPUFeature(FEAT_SSBS2); + if (hwcap2 & HWCAP2_MTE) { + setCPUFeature(FEAT_MEMTAG); + setCPUFeature(FEAT_MEMTAG2); + } + if (hwcap2 & HWCAP2_MTE3) { + setCPUFeature(FEAT_MEMTAG); + setCPUFeature(FEAT_MEMTAG2); + setCPUFeature(FEAT_MEMTAG3); + } + if (hwcap2 & HWCAP2_SVEAES) + setCPUFeature(FEAT_SVE_AES); + if (hwcap2 & HWCAP2_SVEPMULL) { + setCPUFeature(FEAT_SVE_AES); + setCPUFeature(FEAT_SVE_PMULL128); + } + if (hwcap2 & HWCAP2_SVEBITPERM) + setCPUFeature(FEAT_SVE_BITPERM); + if (hwcap2 & HWCAP2_SVESHA3) + setCPUFeature(FEAT_SVE_SHA3); + if (hwcap2 & HWCAP2_SVESM4) + setCPUFeature(FEAT_SVE_SM4); + if (hwcap2 & HWCAP2_DCPODP) + setCPUFeature(FEAT_DPB2); + if (hwcap & HWCAP_ATOMICS) + setCPUFeature(FEAT_LSE); + if (hwcap2 & HWCAP2_RNG) + setCPUFeature(FEAT_RNG); + if (hwcap2 & HWCAP2_I8MM) + setCPUFeature(FEAT_I8MM); + if (hwcap2 & HWCAP2_EBF16) + setCPUFeature(FEAT_EBF16); + if (hwcap2 & HWCAP2_SVE_EBF16) + setCPUFeature(FEAT_SVE_EBF16); + if (hwcap2 & HWCAP2_DGH) + setCPUFeature(FEAT_DGH); + if (hwcap2 & HWCAP2_FRINT) + setCPUFeature(FEAT_FRINTTS); + if (hwcap2 & HWCAP2_SVEI8MM) + setCPUFeature(FEAT_SVE_I8MM); + if (hwcap2 & HWCAP2_SVEF32MM) + setCPUFeature(FEAT_SVE_F32MM); + if (hwcap2 & HWCAP2_SVEF64MM) + setCPUFeature(FEAT_SVE_F64MM); + if (hwcap2 & HWCAP2_BTI) + setCPUFeature(FEAT_BTI); + if (hwcap2 & HWCAP2_RPRES) + setCPUFeature(FEAT_RPRES); + if (hwcap2 & HWCAP2_WFXT) + setCPUFeature(FEAT_WFXT); + if (hwcap2 & HWCAP2_SME) + setCPUFeature(FEAT_SME); + if (hwcap2 & HWCAP2_SME_I16I64) + setCPUFeature(FEAT_SME_I64); + if (hwcap2 & HWCAP2_SME_F64F64) + setCPUFeature(FEAT_SME_F64); + if (hwcap & HWCAP_CPUID) { + unsigned long ftr; + getCPUFeature(ID_AA64PFR1_EL1, ftr); + // ID_AA64PFR1_EL1.MTE >= 0b0001 + if (extractBits(ftr, 8, 4) >= 0x1) + setCPUFeature(FEAT_MEMTAG); + // ID_AA64PFR1_EL1.SSBS == 0b0001 + if (extractBits(ftr, 4, 4) == 0x1) + setCPUFeature(FEAT_SSBS); + // ID_AA64PFR1_EL1.SME == 0b0010 + if (extractBits(ftr, 24, 4) == 0x2) + setCPUFeature(FEAT_SME2); + getCPUFeature(ID_AA64PFR0_EL1, ftr); + // ID_AA64PFR0_EL1.FP != 0b1111 + if (extractBits(ftr, 16, 4) != 0xF) { + setCPUFeature(FEAT_FP); + // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP + setCPUFeature(FEAT_SIMD); + } + // ID_AA64PFR0_EL1.SVE != 0b0000 + if (extractBits(ftr, 32, 4) != 0x0) { + // get ID_AA64ZFR0_EL1, that name supported + // if sve enabled only + getCPUFeature(S3_0_C0_C4_4, ftr); + // ID_AA64ZFR0_EL1.SVEver == 0b0000 + if (extractBits(ftr, 0, 4) == 0x0) + setCPUFeature(FEAT_SVE); + // ID_AA64ZFR0_EL1.SVEver == 0b0001 + if (extractBits(ftr, 0, 4) == 0x1) + setCPUFeature(FEAT_SVE2); + // ID_AA64ZFR0_EL1.BF16 != 0b0000 + if (extractBits(ftr, 20, 4) != 0x0) + setCPUFeature(FEAT_SVE_BF16); + } + getCPUFeature(ID_AA64ISAR0_EL1, ftr); + // ID_AA64ISAR0_EL1.SHA3 != 0b0000 + if (extractBits(ftr, 32, 4) != 0x0) + setCPUFeature(FEAT_SHA3); + getCPUFeature(ID_AA64ISAR1_EL1, ftr); + // ID_AA64ISAR1_EL1.DPB >= 0b0001 + if (extractBits(ftr, 0, 4) >= 0x1) + setCPUFeature(FEAT_DPB); + // ID_AA64ISAR1_EL1.LRCPC != 0b0000 + if (extractBits(ftr, 20, 4) != 0x0) + setCPUFeature(FEAT_RCPC); + // ID_AA64ISAR1_EL1.LRCPC == 0b0011 + if (extractBits(ftr, 20, 4) == 0x3) + setCPUFeature(FEAT_RCPC3); + // ID_AA64ISAR1_EL1.SPECRES == 0b0001 + if (extractBits(ftr, 40, 4) == 0x2) + setCPUFeature(FEAT_PREDRES); + // ID_AA64ISAR1_EL1.BF16 != 0b0000 + if (extractBits(ftr, 44, 4) != 0x0) + setCPUFeature(FEAT_BF16); + // ID_AA64ISAR1_EL1.LS64 >= 0b0001 + if (extractBits(ftr, 60, 4) >= 0x1) + setCPUFeature(FEAT_LS64); + // ID_AA64ISAR1_EL1.LS64 >= 0b0010 + if (extractBits(ftr, 60, 4) >= 0x2) + setCPUFeature(FEAT_LS64_V); + // ID_AA64ISAR1_EL1.LS64 >= 0b0011 + if (extractBits(ftr, 60, 4) >= 0x3) + setCPUFeature(FEAT_LS64_ACCDATA); + } else { + // Set some features in case of no CPUID support + if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { + setCPUFeature(FEAT_FP); + // FP and AdvSIMD fields have the same value + setCPUFeature(FEAT_SIMD); + } + if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) + setCPUFeature(FEAT_DPB); + if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC); + if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) + setCPUFeature(FEAT_BF16); + if (hwcap2 & HWCAP2_SVEBF16) + setCPUFeature(FEAT_SVE_BF16); + if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) + setCPUFeature(FEAT_SVE2); + if (hwcap & HWCAP_SHA3) + setCPUFeature(FEAT_SHA3); + } + setCPUFeature(FEAT_INIT); +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc new file mode 100644 index 00000000000000..52fb7ab4a0e530 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc @@ -0,0 +1,17 @@ +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) { + if (__aarch64_cpu_features.features) + return; + __init_cpu_features_constructor(hwcap, arg); +} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + // CPU features already initialized. + if (__aarch64_cpu_features.features) + return; + __ifunc_arg_t arg; + arg._size = sizeof(__ifunc_arg_t); + arg._hwcap = getauxval(AT_HWCAP); + arg._hwcap2 = getauxval(AT_HWCAP2); + __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg); +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/unimplemented.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/unimplemented.inc new file mode 100644 index 00000000000000..dc34624807b748 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/unimplemented.inc @@ -0,0 +1,8 @@ +// On platforms that have not implemented this yet, we provide an implementation +// that does not claim support for any features by leaving +// __aarch64_cpu_features.features initialized to 0. + +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) {} + +void __init_cpu_features(void) {} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/android.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/android.inc new file mode 100644 index 00000000000000..10d43db5f5d29e --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/android.inc @@ -0,0 +1,27 @@ +#include +#include + +static void __isExynos9810(void) { + char arch[PROP_VALUE_MAX]; + return (__system_property_get("ro.arch", arch) > 0 && + strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0; +} + +static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { + unsigned long hwcap = getauxval(AT_HWCAP); + _Bool result = (hwcap & HWCAP_ATOMICS) != 0; + if (result) { + // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0; + // only the former support LSE atomics. However, the kernel in the + // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly + // reported the feature as being supported. + // + // The kernel appears to have been corrected to mark it unsupported as of + // the Android 9.0 release on those devices, and this issue has not been + // observed anywhere else. Thus, this workaround may be removed if + // compiler-rt ever drops support for Android 8.0. + if (__isExynos9810()) + result = false; + } + __aarch64_have_lse_atomics = result; +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/freebsd.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/freebsd.inc new file mode 100644 index 00000000000000..4a1f9c2c27c828 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/freebsd.inc @@ -0,0 +1,5 @@ +static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { + unsigned long hwcap; + int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); + __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0; +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/fuchsia.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/fuchsia.inc new file mode 100644 index 00000000000000..91eac70ae6c5e9 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/fuchsia.inc @@ -0,0 +1,12 @@ +#include +#include + +static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { + // This ensures the vDSO is a direct link-time dependency of anything that + // needs this initializer code. +#pragma comment(lib, "zircon") + uint32_t features; + zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); + __aarch64_have_lse_atomics = + status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0; +} diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/sysauxv.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/sysauxv.inc new file mode 100644 index 00000000000000..6642c1f5b60b39 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/sysauxv.inc @@ -0,0 +1,6 @@ +#include + +static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { + unsigned long hwcap = getauxval(AT_HWCAP); + __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0; +} diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h new file mode 100644 index 00000000000000..924ca89cf60f5c --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h @@ -0,0 +1,41 @@ +//===-- cpu_model_common.c - Utilities for cpu model detection ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements common utilities for runtime cpu model detection. +// +//===----------------------------------------------------------------------===// + +#ifndef COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H +#define COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H + +#define bool int +#define true 1 +#define false 0 + +#ifndef __has_attribute +#define __has_attribute(attr) 0 +#endif + +#if __has_attribute(constructor) +#if __GNUC__ >= 9 +// Ordinarily init priorities below 101 are disallowed as they are reserved for +// the implementation. However, we are the implementation, so silence the +// diagnostic, since it doesn't apply to us. +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif +// We're choosing init priority 90 to force our constructors to run before any +// constructors in the end user application (starting at priority 101). This +// value matches the libgcc choice for the same functions. +#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) +#else +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE +#endif + +#endif diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model/x86.c similarity index 57% rename from compiler-rt/lib/builtins/cpu_model.c rename to compiler-rt/lib/builtins/cpu_model/x86.c index b0ec5e51e96d49..72b0d55d65f0f7 100644 --- a/compiler-rt/lib/builtins/cpu_model.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -1,4 +1,4 @@ -//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// +//===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,42 +8,21 @@ // // This file is based on LLVM's lib/Support/Host.cpp. // It implements the operating system Host concept and builtin -// __cpu_model for the compiler_rt library for x86 and -// __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64. +// __cpu_model for the compiler_rt library for x86. // //===----------------------------------------------------------------------===// -#ifndef __has_attribute -#define __has_attribute(attr) 0 -#endif +#include "cpu_model.h" -#if __has_attribute(constructor) -#if __GNUC__ >= 9 -// Ordinarily init priorities below 101 are disallowed as they are reserved for the -// implementation. However, we are the implementation, so silence the diagnostic, -// since it doesn't apply to us. -#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" -#endif -// We're choosing init priority 90 to force our constructors to run before any -// constructors in the end user application (starting at priority 101). This value -// matches the libgcc choice for the same functions. -#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) -#else -// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that -// this runs during initialization. -#define CONSTRUCTOR_ATTRIBUTE +#if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ + defined(_M_X64)) +#error This file is intended only for x86-based targets #endif -#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ - defined(_M_X64)) && \ - (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) +#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) #include -#define bool int -#define true 1 -#define false 0 - #ifdef _MSC_VER #include #endif @@ -319,12 +298,12 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } } -static const char * -getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, - const unsigned *Features, - unsigned *Type, unsigned *Subtype) { -#define testFeature(F) \ - (Features[F / 32] & (1 << (F % 32))) != 0 +static const char *getIntelProcessorTypeAndSubtype(unsigned Family, + unsigned Model, + const unsigned *Features, + unsigned *Type, + unsigned *Subtype) { +#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 // We select CPU strings to match the code in Host.cpp, but we don't use them // in compiler-rt. @@ -357,7 +336,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. case 0x1f: - case 0x2e: // Nehalem EX + case 0x2e: // Nehalem EX CPU = "nehalem"; *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_NEHALEM; @@ -378,7 +357,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, *Subtype = INTEL_COREI7_SANDYBRIDGE; break; case 0x3a: - case 0x3e: // Ivy Bridge EP + case 0x3e: // Ivy Bridge EP CPU = "ivybridge"; *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_IVYBRIDGE; @@ -405,12 +384,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; // Skylake: - case 0x4e: // Skylake mobile - case 0x5e: // Skylake desktop - case 0x8e: // Kaby Lake mobile - case 0x9e: // Kaby Lake desktop - case 0xa5: // Comet Lake-H/S - case 0xa6: // Comet Lake-U + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop + case 0xa5: // Comet Lake-H/S + case 0xa6: // Comet Lake-U CPU = "skylake"; *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_SKYLAKE; @@ -609,10 +588,11 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, return CPU; } -static const char * -getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, - const unsigned *Features, - unsigned *Type, unsigned *Subtype) { +static const char *getAMDProcessorTypeAndSubtype(unsigned Family, + unsigned Model, + const unsigned *Features, + unsigned *Type, + unsigned *Subtype) { // We select CPU strings to match the code in Host.cpp, but we don't use them // in compiler-rt. const char *CPU = 0; @@ -689,10 +669,8 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, *Subtype = AMDFAM19H_ZNVER3; break; } - if ((Model >= 0x10 && Model <= 0x1f) || - (Model >= 0x60 && Model <= 0x74) || - (Model >= 0x78 && Model <= 0x7b) || - (Model >= 0xA0 && Model <= 0xAf)) { + if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x74) || + (Model >= 0x78 && Model <= 0x7b) || (Model >= 0xA0 && Model <= 0xAf)) { CPU = "znver4"; *Subtype = AMDFAM19H_ZNVER4; break; // "znver4" @@ -710,8 +688,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, unsigned EAX = 0, EBX = 0; #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1) -#define setFeature(F) \ - Features[F / 32] |= 1U << (F % 32) +#define setFeature(F) Features[F / 32] |= 1U << (F % 32) if ((EDX >> 15) & 1) setFeature(FEATURE_CMOV); @@ -938,567 +915,4 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { return 0; } -#elif defined(__aarch64__) - -// LSE support detection for out-of-line atomics -// using HWCAP and Auxiliary vector -_Bool __aarch64_have_lse_atomics - __attribute__((visibility("hidden"), nocommon)); - -#if defined(__has_include) -#if __has_include() -#include - -#if __has_include() -#include -#else -typedef struct __ifunc_arg_t { - unsigned long _size; - unsigned long _hwcap; - unsigned long _hwcap2; -} __ifunc_arg_t; -#endif // __has_include() - -#if __has_include() -#include - -#if defined(__ANDROID__) -#include -#include -#elif defined(__Fuchsia__) -#include -#include -#endif - -#ifndef _IFUNC_ARG_HWCAP -#define _IFUNC_ARG_HWCAP (1ULL << 62) -#endif -#ifndef AT_HWCAP -#define AT_HWCAP 16 -#endif -#ifndef HWCAP_CPUID -#define HWCAP_CPUID (1 << 11) -#endif -#ifndef HWCAP_FP -#define HWCAP_FP (1 << 0) -#endif -#ifndef HWCAP_ASIMD -#define HWCAP_ASIMD (1 << 1) -#endif -#ifndef HWCAP_AES -#define HWCAP_AES (1 << 3) -#endif -#ifndef HWCAP_PMULL -#define HWCAP_PMULL (1 << 4) -#endif -#ifndef HWCAP_SHA1 -#define HWCAP_SHA1 (1 << 5) -#endif -#ifndef HWCAP_SHA2 -#define HWCAP_SHA2 (1 << 6) -#endif -#ifndef HWCAP_ATOMICS -#define HWCAP_ATOMICS (1 << 8) -#endif -#ifndef HWCAP_FPHP -#define HWCAP_FPHP (1 << 9) -#endif -#ifndef HWCAP_ASIMDHP -#define HWCAP_ASIMDHP (1 << 10) -#endif -#ifndef HWCAP_ASIMDRDM -#define HWCAP_ASIMDRDM (1 << 12) -#endif -#ifndef HWCAP_JSCVT -#define HWCAP_JSCVT (1 << 13) -#endif -#ifndef HWCAP_FCMA -#define HWCAP_FCMA (1 << 14) -#endif -#ifndef HWCAP_LRCPC -#define HWCAP_LRCPC (1 << 15) -#endif -#ifndef HWCAP_DCPOP -#define HWCAP_DCPOP (1 << 16) -#endif -#ifndef HWCAP_SHA3 -#define HWCAP_SHA3 (1 << 17) -#endif -#ifndef HWCAP_SM3 -#define HWCAP_SM3 (1 << 18) -#endif -#ifndef HWCAP_SM4 -#define HWCAP_SM4 (1 << 19) -#endif -#ifndef HWCAP_ASIMDDP -#define HWCAP_ASIMDDP (1 << 20) -#endif -#ifndef HWCAP_SHA512 -#define HWCAP_SHA512 (1 << 21) -#endif -#ifndef HWCAP_SVE -#define HWCAP_SVE (1 << 22) -#endif -#ifndef HWCAP_ASIMDFHM -#define HWCAP_ASIMDFHM (1 << 23) -#endif -#ifndef HWCAP_DIT -#define HWCAP_DIT (1 << 24) -#endif -#ifndef HWCAP_ILRCPC -#define HWCAP_ILRCPC (1 << 26) -#endif -#ifndef HWCAP_FLAGM -#define HWCAP_FLAGM (1 << 27) -#endif -#ifndef HWCAP_SSBS -#define HWCAP_SSBS (1 << 28) -#endif -#ifndef HWCAP_SB -#define HWCAP_SB (1 << 29) -#endif - -#ifndef AT_HWCAP2 -#define AT_HWCAP2 26 -#endif -#ifndef HWCAP2_DCPODP -#define HWCAP2_DCPODP (1 << 0) -#endif -#ifndef HWCAP2_SVE2 -#define HWCAP2_SVE2 (1 << 1) -#endif -#ifndef HWCAP2_SVEAES -#define HWCAP2_SVEAES (1 << 2) -#endif -#ifndef HWCAP2_SVEPMULL -#define HWCAP2_SVEPMULL (1 << 3) -#endif -#ifndef HWCAP2_SVEBITPERM -#define HWCAP2_SVEBITPERM (1 << 4) -#endif -#ifndef HWCAP2_SVESHA3 -#define HWCAP2_SVESHA3 (1 << 5) -#endif -#ifndef HWCAP2_SVESM4 -#define HWCAP2_SVESM4 (1 << 6) -#endif -#ifndef HWCAP2_FLAGM2 -#define HWCAP2_FLAGM2 (1 << 7) -#endif -#ifndef HWCAP2_FRINT -#define HWCAP2_FRINT (1 << 8) -#endif -#ifndef HWCAP2_SVEI8MM -#define HWCAP2_SVEI8MM (1 << 9) -#endif -#ifndef HWCAP2_SVEF32MM -#define HWCAP2_SVEF32MM (1 << 10) -#endif -#ifndef HWCAP2_SVEF64MM -#define HWCAP2_SVEF64MM (1 << 11) -#endif -#ifndef HWCAP2_SVEBF16 -#define HWCAP2_SVEBF16 (1 << 12) -#endif -#ifndef HWCAP2_I8MM -#define HWCAP2_I8MM (1 << 13) -#endif -#ifndef HWCAP2_BF16 -#define HWCAP2_BF16 (1 << 14) -#endif -#ifndef HWCAP2_DGH -#define HWCAP2_DGH (1 << 15) -#endif -#ifndef HWCAP2_RNG -#define HWCAP2_RNG (1 << 16) -#endif -#ifndef HWCAP2_BTI -#define HWCAP2_BTI (1 << 17) -#endif -#ifndef HWCAP2_MTE -#define HWCAP2_MTE (1 << 18) -#endif -#ifndef HWCAP2_RPRES -#define HWCAP2_RPRES (1 << 21) -#endif -#ifndef HWCAP2_MTE3 -#define HWCAP2_MTE3 (1 << 22) -#endif -#ifndef HWCAP2_SME -#define HWCAP2_SME (1 << 23) -#endif -#ifndef HWCAP2_SME_I16I64 -#define HWCAP2_SME_I16I64 (1 << 24) -#endif -#ifndef HWCAP2_SME_F64F64 -#define HWCAP2_SME_F64F64 (1 << 25) -#endif -#ifndef HWCAP2_WFXT -#define HWCAP2_WFXT (1UL << 31) -#endif -#ifndef HWCAP2_EBF16 -#define HWCAP2_EBF16 (1ULL << 32) -#endif -#ifndef HWCAP2_SVE_EBF16 -#define HWCAP2_SVE_EBF16 (1ULL << 33) -#endif - -// Detect Exynos 9810 CPU -#define IF_EXYNOS9810 \ - char arch[PROP_VALUE_MAX]; \ - if (__system_property_get("ro.arch", arch) > 0 && \ - strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0) - -static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { -#if defined(__FreeBSD__) - unsigned long hwcap; - int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); - __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0; -#elif defined(__Fuchsia__) - // This ensures the vDSO is a direct link-time dependency of anything that - // needs this initializer code. -#pragma comment(lib, "zircon") - uint32_t features; - zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); - __aarch64_have_lse_atomics = - status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0; -#else - unsigned long hwcap = getauxval(AT_HWCAP); - _Bool result = (hwcap & HWCAP_ATOMICS) != 0; -#if defined(__ANDROID__) - if (result) { - // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0; - // only the former support LSE atomics. However, the kernel in the - // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly - // reported the feature as being supported. - // - // The kernel appears to have been corrected to mark it unsupported as of - // the Android 9.0 release on those devices, and this issue has not been - // observed anywhere else. Thus, this workaround may be removed if - // compiler-rt ever drops support for Android 8.0. - IF_EXYNOS9810 result = false; - } -#endif // defined(__ANDROID__) - __aarch64_have_lse_atomics = result; -#endif // defined(__FreeBSD__) -} - -#if !defined(DISABLE_AARCH64_FMV) -// CPUFeatures must correspond to the same AArch64 features in -// AArch64TargetParser.h -enum CPUFeatures { - FEAT_RNG, - FEAT_FLAGM, - FEAT_FLAGM2, - FEAT_FP16FML, - FEAT_DOTPROD, - FEAT_SM4, - FEAT_RDM, - FEAT_LSE, - FEAT_FP, - FEAT_SIMD, - FEAT_CRC, - FEAT_SHA1, - FEAT_SHA2, - FEAT_SHA3, - FEAT_AES, - FEAT_PMULL, - FEAT_FP16, - FEAT_DIT, - FEAT_DPB, - FEAT_DPB2, - FEAT_JSCVT, - FEAT_FCMA, - FEAT_RCPC, - FEAT_RCPC2, - FEAT_FRINTTS, - FEAT_DGH, - FEAT_I8MM, - FEAT_BF16, - FEAT_EBF16, - FEAT_RPRES, - FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, - FEAT_SVE_F32MM, - FEAT_SVE_F64MM, - FEAT_SVE2, - FEAT_SVE_AES, - FEAT_SVE_PMULL128, - FEAT_SVE_BITPERM, - FEAT_SVE_SHA3, - FEAT_SVE_SM4, - FEAT_SME, - FEAT_MEMTAG, - FEAT_MEMTAG2, - FEAT_MEMTAG3, - FEAT_SB, - FEAT_PREDRES, - FEAT_SSBS, - FEAT_SSBS2, - FEAT_BTI, - FEAT_LS64, - FEAT_LS64_V, - FEAT_LS64_ACCDATA, - FEAT_WFXT, - FEAT_SME_F64, - FEAT_SME_I64, - FEAT_SME2, - FEAT_RCPC3, - FEAT_MAX, - FEAT_EXT = 62, // Reserved to indicate presence of additional features field - // in __aarch64_cpu_features - FEAT_INIT // Used as flag of features initialization completion -}; - -// Architecture features used -// in Function Multi Versioning -struct { - unsigned long long features; - // As features grows new fields could be added -} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon)); - -static void __init_cpu_features_constructor(unsigned long hwcap, - const __ifunc_arg_t *arg) { -#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F -#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) -#define extractBits(val, start, number) \ - (val & ((1ULL << number) - 1ULL) << start) >> start - unsigned long hwcap2 = 0; - if (hwcap & _IFUNC_ARG_HWCAP) - hwcap2 = arg->_hwcap2; - if (hwcap & HWCAP_CRC32) - setCPUFeature(FEAT_CRC); - if (hwcap & HWCAP_PMULL) - setCPUFeature(FEAT_PMULL); - if (hwcap & HWCAP_FLAGM) - setCPUFeature(FEAT_FLAGM); - if (hwcap2 & HWCAP2_FLAGM2) { - setCPUFeature(FEAT_FLAGM); - setCPUFeature(FEAT_FLAGM2); - } - if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) - setCPUFeature(FEAT_SM4); - if (hwcap & HWCAP_ASIMDDP) - setCPUFeature(FEAT_DOTPROD); - if (hwcap & HWCAP_ASIMDFHM) - setCPUFeature(FEAT_FP16FML); - if (hwcap & HWCAP_FPHP) { - setCPUFeature(FEAT_FP16); - setCPUFeature(FEAT_FP); - } - if (hwcap & HWCAP_DIT) - setCPUFeature(FEAT_DIT); - if (hwcap & HWCAP_ASIMDRDM) - setCPUFeature(FEAT_RDM); - if (hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC2); - if (hwcap & HWCAP_AES) - setCPUFeature(FEAT_AES); - if (hwcap & HWCAP_SHA1) - setCPUFeature(FEAT_SHA1); - if (hwcap & HWCAP_SHA2) - setCPUFeature(FEAT_SHA2); - if (hwcap & HWCAP_JSCVT) - setCPUFeature(FEAT_JSCVT); - if (hwcap & HWCAP_FCMA) - setCPUFeature(FEAT_FCMA); - if (hwcap & HWCAP_SB) - setCPUFeature(FEAT_SB); - if (hwcap & HWCAP_SSBS) - setCPUFeature(FEAT_SSBS2); - if (hwcap2 & HWCAP2_MTE) { - setCPUFeature(FEAT_MEMTAG); - setCPUFeature(FEAT_MEMTAG2); - } - if (hwcap2 & HWCAP2_MTE3) { - setCPUFeature(FEAT_MEMTAG); - setCPUFeature(FEAT_MEMTAG2); - setCPUFeature(FEAT_MEMTAG3); - } - if (hwcap2 & HWCAP2_SVEAES) - setCPUFeature(FEAT_SVE_AES); - if (hwcap2 & HWCAP2_SVEPMULL) { - setCPUFeature(FEAT_SVE_AES); - setCPUFeature(FEAT_SVE_PMULL128); - } - if (hwcap2 & HWCAP2_SVEBITPERM) - setCPUFeature(FEAT_SVE_BITPERM); - if (hwcap2 & HWCAP2_SVESHA3) - setCPUFeature(FEAT_SVE_SHA3); - if (hwcap2 & HWCAP2_SVESM4) - setCPUFeature(FEAT_SVE_SM4); - if (hwcap2 & HWCAP2_DCPODP) - setCPUFeature(FEAT_DPB2); - if (hwcap & HWCAP_ATOMICS) - setCPUFeature(FEAT_LSE); - if (hwcap2 & HWCAP2_RNG) - setCPUFeature(FEAT_RNG); - if (hwcap2 & HWCAP2_I8MM) - setCPUFeature(FEAT_I8MM); - if (hwcap2 & HWCAP2_EBF16) - setCPUFeature(FEAT_EBF16); - if (hwcap2 & HWCAP2_SVE_EBF16) - setCPUFeature(FEAT_SVE_EBF16); - if (hwcap2 & HWCAP2_DGH) - setCPUFeature(FEAT_DGH); - if (hwcap2 & HWCAP2_FRINT) - setCPUFeature(FEAT_FRINTTS); - if (hwcap2 & HWCAP2_SVEI8MM) - setCPUFeature(FEAT_SVE_I8MM); - if (hwcap2 & HWCAP2_SVEF32MM) - setCPUFeature(FEAT_SVE_F32MM); - if (hwcap2 & HWCAP2_SVEF64MM) - setCPUFeature(FEAT_SVE_F64MM); - if (hwcap2 & HWCAP2_BTI) - setCPUFeature(FEAT_BTI); - if (hwcap2 & HWCAP2_RPRES) - setCPUFeature(FEAT_RPRES); - if (hwcap2 & HWCAP2_WFXT) - setCPUFeature(FEAT_WFXT); - if (hwcap2 & HWCAP2_SME) - setCPUFeature(FEAT_SME); - if (hwcap2 & HWCAP2_SME_I16I64) - setCPUFeature(FEAT_SME_I64); - if (hwcap2 & HWCAP2_SME_F64F64) - setCPUFeature(FEAT_SME_F64); - if (hwcap & HWCAP_CPUID) { - unsigned long ftr; - getCPUFeature(ID_AA64PFR1_EL1, ftr); - // ID_AA64PFR1_EL1.MTE >= 0b0001 - if (extractBits(ftr, 8, 4) >= 0x1) - setCPUFeature(FEAT_MEMTAG); - // ID_AA64PFR1_EL1.SSBS == 0b0001 - if (extractBits(ftr, 4, 4) == 0x1) - setCPUFeature(FEAT_SSBS); - // ID_AA64PFR1_EL1.SME == 0b0010 - if (extractBits(ftr, 24, 4) == 0x2) - setCPUFeature(FEAT_SME2); - getCPUFeature(ID_AA64PFR0_EL1, ftr); - // ID_AA64PFR0_EL1.FP != 0b1111 - if (extractBits(ftr, 16, 4) != 0xF) { - setCPUFeature(FEAT_FP); - // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP - setCPUFeature(FEAT_SIMD); - } - // ID_AA64PFR0_EL1.SVE != 0b0000 - if (extractBits(ftr, 32, 4) != 0x0) { - // get ID_AA64ZFR0_EL1, that name supported - // if sve enabled only - getCPUFeature(S3_0_C0_C4_4, ftr); - // ID_AA64ZFR0_EL1.SVEver == 0b0000 - if (extractBits(ftr, 0, 4) == 0x0) - setCPUFeature(FEAT_SVE); - // ID_AA64ZFR0_EL1.SVEver == 0b0001 - if (extractBits(ftr, 0, 4) == 0x1) - setCPUFeature(FEAT_SVE2); - // ID_AA64ZFR0_EL1.BF16 != 0b0000 - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_SVE_BF16); - } - getCPUFeature(ID_AA64ISAR0_EL1, ftr); - // ID_AA64ISAR0_EL1.SHA3 != 0b0000 - if (extractBits(ftr, 32, 4) != 0x0) - setCPUFeature(FEAT_SHA3); - getCPUFeature(ID_AA64ISAR1_EL1, ftr); - // ID_AA64ISAR1_EL1.DPB >= 0b0001 - if (extractBits(ftr, 0, 4) >= 0x1) - setCPUFeature(FEAT_DPB); - // ID_AA64ISAR1_EL1.LRCPC != 0b0000 - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_RCPC); - // ID_AA64ISAR1_EL1.LRCPC == 0b0011 - if (extractBits(ftr, 20, 4) == 0x3) - setCPUFeature(FEAT_RCPC3); - // ID_AA64ISAR1_EL1.SPECRES == 0b0001 - if (extractBits(ftr, 40, 4) == 0x2) - setCPUFeature(FEAT_PREDRES); - // ID_AA64ISAR1_EL1.BF16 != 0b0000 - if (extractBits(ftr, 44, 4) != 0x0) - setCPUFeature(FEAT_BF16); - // ID_AA64ISAR1_EL1.LS64 >= 0b0001 - if (extractBits(ftr, 60, 4) >= 0x1) - setCPUFeature(FEAT_LS64); - // ID_AA64ISAR1_EL1.LS64 >= 0b0010 - if (extractBits(ftr, 60, 4) >= 0x2) - setCPUFeature(FEAT_LS64_V); - // ID_AA64ISAR1_EL1.LS64 >= 0b0011 - if (extractBits(ftr, 60, 4) >= 0x3) - setCPUFeature(FEAT_LS64_ACCDATA); - } else { - // Set some features in case of no CPUID support - if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { - setCPUFeature(FEAT_FP); - // FP and AdvSIMD fields have the same value - setCPUFeature(FEAT_SIMD); - } - if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) - setCPUFeature(FEAT_DPB); - if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC); - if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) - setCPUFeature(FEAT_BF16); - if (hwcap2 & HWCAP2_SVEBF16) - setCPUFeature(FEAT_SVE_BF16); - if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) - setCPUFeature(FEAT_SVE2); - if (hwcap & HWCAP_SHA3) - setCPUFeature(FEAT_SHA3); - } - setCPUFeature(FEAT_INIT); -} - -void __init_cpu_features_resolver(unsigned long hwcap, - const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) - return; -#if defined(__ANDROID__) - // ifunc resolvers don't have hwcaps in arguments on Android API lower - // than 30. If so, set feature detection done and keep all CPU features - // unsupported (zeros). To detect this case in runtime we check existence - // of memfd_create function from Standard C library which was introduced in - // Android API 30. - int memfd_create(const char *, unsigned int) __attribute__((weak)); - if (!memfd_create) - return; -#endif // defined(__ANDROID__) - __init_cpu_features_constructor(hwcap, arg); -} - -void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { - unsigned long hwcap; - unsigned long hwcap2; - // CPU features already initialized. - if (__aarch64_cpu_features.features) - return; -#if defined(__FreeBSD__) - int res = 0; - res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); - res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2); - if (res) - return; -#else -#if defined(__ANDROID__) - // Don't set any CPU features, - // detection could be wrong on Exynos 9810. - IF_EXYNOS9810 return; -#endif // defined(__ANDROID__) - hwcap = getauxval(AT_HWCAP); - hwcap2 = getauxval(AT_HWCAP2); -#endif // defined(__FreeBSD__) - __ifunc_arg_t arg; - arg._size = sizeof(__ifunc_arg_t); - arg._hwcap = hwcap; - arg._hwcap2 = hwcap2; - __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg); -#undef extractBits -#undef getCPUFeature -#undef setCPUFeature -#undef IF_EXYNOS9810 -} -#endif // !defined(DISABLE_AARCH64_FMV) -#endif // defined(__has_include) -#endif // __has_include() -#endif // __has_include() -#endif // defined(__aarch64__) +#endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)