diff --git a/cmake/deps.txt b/cmake/deps.txt
index e1870bf2df0cf..f8e5fb7f8ede0 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -47,7 +47,7 @@ protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/downlo
 psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
 pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f780292da9db273c8ef06ccf5fd4b623624143e9
-pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/877328f188a3c7d1fa855871a278eb48d530c4c0.zip;9152d4bf6b8bde9f19b116de3bd8a745097ed9df
+pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b
 re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
 tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
index 1c73dcfb7f332..3c616684fb296 100644
--- a/cmake/external/onnxruntime_external_deps.cmake
+++ b/cmake/external/onnxruntime_external_deps.cmake
@@ -369,7 +369,9 @@ if (CPUINFO_SUPPORTED)
       PATCH_COMMAND
         ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch &&
         # https://github.com/pytorch/cpuinfo/pull/324
-        ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch
+        ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch &&
+        # https://github.com/pytorch/cpuinfo/pull/348
+        ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
       FIND_PACKAGE_ARGS NAMES cpuinfo
     )
   else()
diff --git a/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch b/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
new file mode 100644
index 0000000000000..44ac0f13f5466
--- /dev/null
+++ b/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
@@ -0,0 +1,19 @@
+diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c
+index 5c0a5f3..a07fbe4 100644
+--- a/src/arm/windows/init.c
++++ b/src/arm/windows/init.c
+@@ -249,6 +249,14 @@ static void set_cpuinfo_isa_fields(void) {
+ 	// guarantee that, but it holds in practice.
+ 	cpuinfo_isa.rdm = dotprod;
+ 
++	// PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE may not be available in older
++	// Windows versions. If fp16arith was not detected with
++	// IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE), fall
++	// back to using the value of dotprod.
++	if (!cpuinfo_isa.fp16arith) {
++		cpuinfo_isa.fp16arith = dotprod;
++	}
++
+ 	/* Windows API reports all or nothing for cryptographic instructions. */
+ 	const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
+ 	cpuinfo_isa.aes = crypto;
diff --git a/cmake/vcpkg-ports/cpuinfo/portfile.cmake b/cmake/vcpkg-ports/cpuinfo/portfile.cmake
index 80192840ee9b0..67bd18e61cc28 100644
--- a/cmake/vcpkg-ports/cpuinfo/portfile.cmake
+++ b/cmake/vcpkg-ports/cpuinfo/portfile.cmake
@@ -6,12 +6,13 @@ endif()
 vcpkg_from_github(
     OUT_SOURCE_PATH SOURCE_PATH
     REPO pytorch/cpuinfo
-    REF 877328f188a3c7d1fa855871a278eb48d530c4c0
-    SHA512 b6d5a9ce9996eee3b2f09f39115f7ae178fe4d4814cc35b049a59d04a82228e268aa52d073c307ccb56a427428622940e1c77f004c99851dfca0d3a5d803658b
+    REF 403d652dca4c1046e8145950b1c0997a9f748b57
+    SHA512 f7cd6dc44bd1120af610cae1337ed4c0f557ba78d2de9c73fed350fa3dfe9512643a1619ae55f5a540c6316a87d641856cca27297bb8766e48f39b7b7a59da1f
     HEAD_REF master
     PATCHES
         patch_cpuinfo_h_for_arm64ec.patch
-        patch_vcpkg_arm64ec_support.patch  # https://github.com/pytorch/cpuinfo/pull/324
+        patch_vcpkg_arm64ec_support.patch       # https://github.com/pytorch/cpuinfo/pull/324
+        win_arm_fp16_detection_fallback.patch   # https://github.com/pytorch/cpuinfo/pull/348
 )
 
 vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
diff --git a/cmake/vcpkg-ports/cpuinfo/vcpkg.json b/cmake/vcpkg-ports/cpuinfo/vcpkg.json
index f1ccda72679b1..76486eceecf12 100644
--- a/cmake/vcpkg-ports/cpuinfo/vcpkg.json
+++ b/cmake/vcpkg-ports/cpuinfo/vcpkg.json
@@ -1,7 +1,7 @@
 {
   "name": "cpuinfo",
-  "version-date": "2025-10-23",
-  "port-version": 4,
+  "version-date": "2025-11-18",
+  "port-version": 5,
   "description": "CPU INFOrmation library (x86/x86-64/ARM/ARM64, Linux/Windows/Android/macOS/iOS)",
   "homepage": "https://github.com/pytorch/cpuinfo",
   "license": "BSD-2-Clause",
diff --git a/cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch b/cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch
new file mode 100644
index 0000000000000..44ac0f13f5466
--- /dev/null
+++ b/cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch
@@ -0,0 +1,19 @@
+diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c
+index 5c0a5f3..a07fbe4 100644
+--- a/src/arm/windows/init.c
++++ b/src/arm/windows/init.c
+@@ -249,6 +249,14 @@ static void set_cpuinfo_isa_fields(void) {
+ 	// guarantee that, but it holds in practice.
+ 	cpuinfo_isa.rdm = dotprod;
+ 
++	// PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE may not be available in older
++	// Windows versions. If fp16arith was not detected with
++	// IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE), fall
++	// back to using the value of dotprod.
++	if (!cpuinfo_isa.fp16arith) {
++		cpuinfo_isa.fp16arith = dotprod;
++	}
++
+ 	/* Windows API reports all or nothing for cryptographic instructions. */
+ 	const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
+ 	cpuinfo_isa.aes = crypto;
diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc
index ab8ab0b326292..afea9f62419fa 100644
--- a/onnxruntime/core/common/cpuid_info.cc
+++ b/onnxruntime/core/common/cpuid_info.cc
@@ -237,9 +237,9 @@ void CPUIDInfo::ArmLinuxInit() {
 #elif defined(_WIN32)  // ^ defined(__linux__)
 
 void CPUIDInfo::ArmWindowsInit() {
-  // Read MIDR and ID_AA64ISAR1_EL1 register values from Windows registry
+  // Read MIDR register values from Windows registry
   // There should be one per CPU
-  std::vector<uint64_t> midr_values{}, id_aa64isar1_el1_values{};
+  std::vector<uint64_t> midr_values{};
 
   // TODO!! Don't support multiple processor group yet!!
   constexpr int MAX_CORES = 64;
@@ -272,17 +272,7 @@ void CPUIDInfo::ArmWindowsInit() {
       break;
     }
 
-    uint64_t id_aa64isar1_el1_value;
-    data_size = sizeof(id_aa64isar1_el1_value);
-
-    // CP 4031 corresponds to ID_AA64ISAR1_EL1 register
-    if (::RegGetValueA(HKEY_LOCAL_MACHINE, processor_subkey, "CP 4031", RRF_RT_REG_QWORD,
-                       nullptr, &id_aa64isar1_el1_value, &data_size) != ERROR_SUCCESS) {
-      break;
-    }
-
     midr_values.push_back(midr_value);
-    id_aa64isar1_el1_values.push_back(id_aa64isar1_el1_value);
   }
 
   // process midr_values
@@ -308,22 +298,15 @@ void CPUIDInfo::ArmWindowsInit() {
     }
   }
 
-  has_arm_neon_i8mm_ = std::all_of(
-      id_aa64isar1_el1_values.begin(), id_aa64isar1_el1_values.end(),
-      [](uint64_t id_aa64isar1_el1_value) {
-        // I8MM, bits [55:52]
-        return ((id_aa64isar1_el1_value >> 52) & 0xF) != 0;
-      });
-
-  has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);
-
 #if defined(CPUINFO_SUPPORTED)
   if (pytorch_cpuinfo_init_) {
+    has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
     has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
-    // cpuinfo_has_arm_i8mm() doesn't work on Windows yet. See https://github.com/pytorch/cpuinfo/issues/279.
-    // has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
-    has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && has_arm_neon_i8mm_;
+    has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
+    has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
     has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
+    has_arm_sme_ = cpuinfo_has_arm_sme();
+    has_arm_sme2_ = cpuinfo_has_arm_sme2();
   }
 #endif  // defined(CPUINFO_SUPPORTED)
 }
@@ -397,4 +380,4 @@ CPUIDInfo::CPUIDInfo() {
 #endif
 #endif  // defined(CPUIDINFO_ARCH_ARM)
 }
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h
index 9c40627b5cd1b..ca9315c7ef95d 100644
--- a/onnxruntime/core/common/cpuid_info.h
+++ b/onnxruntime/core/common/cpuid_info.h
@@ -171,4 +171,4 @@ class CPUIDInfo {
   uint32_t vendor_id_;
 };
 
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/mlas/lib/qgemm.cpp b/onnxruntime/core/mlas/lib/qgemm.cpp
index a1c2e467188f7..4c675f104c52b 100644
--- a/onnxruntime/core/mlas/lib/qgemm.cpp
+++ b/onnxruntime/core/mlas/lib/qgemm.cpp
@@ -407,7 +407,7 @@ Return Value:
         ~(BufferAlignment - 1);
     // If this gemm B argument is used in a dynamically quantization gemm operation we can optimize for
     // this use case. Concat both packed representations for later decision. This allows for cases later
-    // where we still have the prepack at the cost of some memory otherwise we can use the qgemm quantization 
+    // where we still have the prepack at the cost of some memory otherwise we can use the qgemm quantization
     // for better performance
     return AlignedBytesRequired + MlasDynamicQgemmPackBSize(N, K);
 }