Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/downlo
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f780292da9db273c8ef06ccf5fd4b623624143e9
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/877328f188a3c7d1fa855871a278eb48d530c4c0.zip;9152d4bf6b8bde9f19b116de3bd8a745097ed9df
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
Expand Down
4 changes: 3 additions & 1 deletion cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,9 @@ if (CPUINFO_SUPPORTED)
PATCH_COMMAND
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch &&
# https://github.com/pytorch/cpuinfo/pull/324
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch &&
# https://github.com/pytorch/cpuinfo/pull/348
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
FIND_PACKAGE_ARGS NAMES cpuinfo
)
else()
Expand Down
19 changes: 19 additions & 0 deletions cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c
index 5c0a5f3..a07fbe4 100644
--- a/src/arm/windows/init.c
+++ b/src/arm/windows/init.c
@@ -249,6 +249,14 @@ static void set_cpuinfo_isa_fields(void) {
// guarantee that, but it holds in practice.
cpuinfo_isa.rdm = dotprod;

+ // PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE may not be available in older
+ // Windows versions. If fp16arith was not detected with
+ // IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE), fall
+ // back to using the value of dotprod.
+ if (!cpuinfo_isa.fp16arith) {
+ cpuinfo_isa.fp16arith = dotprod;
+ }
+
/* Windows API reports all or nothing for cryptographic instructions. */
const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
cpuinfo_isa.aes = crypto;
7 changes: 4 additions & 3 deletions cmake/vcpkg-ports/cpuinfo/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ endif()
vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO pytorch/cpuinfo
REF 877328f188a3c7d1fa855871a278eb48d530c4c0
SHA512 b6d5a9ce9996eee3b2f09f39115f7ae178fe4d4814cc35b049a59d04a82228e268aa52d073c307ccb56a427428622940e1c77f004c99851dfca0d3a5d803658b
REF 403d652dca4c1046e8145950b1c0997a9f748b57
SHA512 f7cd6dc44bd1120af610cae1337ed4c0f557ba78d2de9c73fed350fa3dfe9512643a1619ae55f5a540c6316a87d641856cca27297bb8766e48f39b7b7a59da1f
HEAD_REF master
PATCHES
patch_cpuinfo_h_for_arm64ec.patch
patch_vcpkg_arm64ec_support.patch # https://github.com/pytorch/cpuinfo/pull/324
patch_vcpkg_arm64ec_support.patch # https://github.com/pytorch/cpuinfo/pull/324
win_arm_fp16_detection_fallback.patch # https://github.com/pytorch/cpuinfo/pull/348
)

vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
Expand Down
4 changes: 2 additions & 2 deletions cmake/vcpkg-ports/cpuinfo/vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "cpuinfo",
"version-date": "2025-10-23",
"port-version": 4,
"version-date": "2025-11-18",
"port-version": 5,
"description": "CPU INFOrmation library (x86/x86-64/ARM/ARM64, Linux/Windows/Android/macOS/iOS)",
"homepage": "https://github.com/pytorch/cpuinfo",
"license": "BSD-2-Clause",
Expand Down
19 changes: 19 additions & 0 deletions cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c
index 5c0a5f3..a07fbe4 100644
--- a/src/arm/windows/init.c
+++ b/src/arm/windows/init.c
@@ -249,6 +249,14 @@ static void set_cpuinfo_isa_fields(void) {
// guarantee that, but it holds in practice.
cpuinfo_isa.rdm = dotprod;

+ // PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE may not be available in older
+ // Windows versions. If fp16arith was not detected with
+ // IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE), fall
+ // back to using the value of dotprod.
+ if (!cpuinfo_isa.fp16arith) {
+ cpuinfo_isa.fp16arith = dotprod;
+ }
+
/* Windows API reports all or nothing for cryptographic instructions. */
const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
cpuinfo_isa.aes = crypto;
33 changes: 8 additions & 25 deletions onnxruntime/core/common/cpuid_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,9 @@
#elif defined(_WIN32) // ^ defined(__linux__)

void CPUIDInfo::ArmWindowsInit() {
// Read MIDR and ID_AA64ISAR1_EL1 register values from Windows registry
// Read MIDR register values from Windows registry
// There should be one per CPU
std::vector<uint64_t> midr_values{}, id_aa64isar1_el1_values{};
std::vector<uint64_t> midr_values{};

Check warning on line 242 in onnxruntime/core/common/cpuid_info.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <vector> for vector<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/common/cpuid_info.cc:242: Add #include <vector> for vector<> [build/include_what_you_use] [4]

// TODO!! Don't support multiple processor group yet!!
constexpr int MAX_CORES = 64;
Expand Down Expand Up @@ -272,17 +272,7 @@
break;
}

uint64_t id_aa64isar1_el1_value;
data_size = sizeof(id_aa64isar1_el1_value);

// CP 4031 corresponds to ID_AA64ISAR1_EL1 register
if (::RegGetValueA(HKEY_LOCAL_MACHINE, processor_subkey, "CP 4031", RRF_RT_REG_QWORD,
nullptr, &id_aa64isar1_el1_value, &data_size) != ERROR_SUCCESS) {
break;
}

midr_values.push_back(midr_value);
id_aa64isar1_el1_values.push_back(id_aa64isar1_el1_value);
}

// process midr_values
Expand All @@ -308,22 +298,15 @@
}
}

has_arm_neon_i8mm_ = std::all_of(
id_aa64isar1_el1_values.begin(), id_aa64isar1_el1_values.end(),
[](uint64_t id_aa64isar1_el1_value) {
// I8MM, bits [55:52]
return ((id_aa64isar1_el1_value >> 52) & 0xF) != 0;
});

has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);

#if defined(CPUINFO_SUPPORTED)
if (pytorch_cpuinfo_init_) {
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
// cpuinfo_has_arm_i8mm() doesn't work on Windows yet. See https://github.com/pytorch/cpuinfo/issues/279.
// has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && has_arm_neon_i8mm_;
has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
has_arm_sme_ = cpuinfo_has_arm_sme();
has_arm_sme2_ = cpuinfo_has_arm_sme2();
}
#endif // defined(CPUINFO_SUPPORTED)
}
Expand Down Expand Up @@ -397,4 +380,4 @@
#endif
#endif // defined(CPUIDINFO_ARCH_ARM)
}
} // namespace onnxruntime
} // namespace onnxruntime
2 changes: 1 addition & 1 deletion onnxruntime/core/common/cpuid_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,4 @@ class CPUIDInfo {
uint32_t vendor_id_;
};

} // namespace onnxruntime
} // namespace onnxruntime
2 changes: 1 addition & 1 deletion onnxruntime/core/mlas/lib/qgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ Return Value:
~(BufferAlignment - 1);
// If this gemm B argument is used in a dynamically quantization gemm operation we can optimize for
// this use case. Concat both packed representations for later decision. This allows for cases later
// where we still have the prepack at the cost of some memory otherwise we can use the qgemm quantization
// where we still have the prepack at the cost of some memory otherwise we can use the qgemm quantization
// for better performance
return AlignedBytesRequired + MlasDynamicQgemmPackBSize(N, K);
}
Expand Down
Loading