From 044ea1d9740de4d49c1ce4b1be23441483b24e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20Hor=C3=A1k?= Date: Wed, 17 Sep 2025 13:26:54 +0000 Subject: [PATCH] cpu: ppc64: enable build without MMA Currently some accelerated code requires the MMA engine to be enabled in the compiler, which limits the supported hardware to Power10+. Add a configure check and omit the problematic code when building for older CPUs. --- src/cpu/CMakeLists.txt | 27 ++++++++++++++----- .../reorder/cpu_reorder_regular_f32_u8.cpp | 2 ++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/cpu/CMakeLists.txt b/src/cpu/CMakeLists.txt index 10504d1a341..222e6348970 100644 --- a/src/cpu/CMakeLists.txt +++ b/src/cpu/CMakeLists.txt @@ -40,12 +40,23 @@ if((DNNL_TARGET_ARCH STREQUAL "X64") OR (DNNL_TARGET_ARCH STREQUAL "AARCH64")) endif() if(DNNL_TARGET_ARCH STREQUAL "PPC64") - file(GLOB FILES_REQUIRED_OPT - ${CMAKE_CURRENT_SOURCE_DIR}/gemm/*.[ch]pp - ) - if(NOT UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG") - set_source_files_properties(${FILES_REQUIRED_OPT} - PROPERTIES COMPILE_FLAGS "-O3 -funroll-loops") + include(CheckSourceCompiles) + + check_source_compiles(C [[ + #if !defined(__MMA__) + # error "MMA not available" + #endif + int main(void) { return 0; } + ]] DNNL_PPC64_HAS_MMA) + if(DNNL_PPC64_HAS_MMA) + add_definitions_with_host_compiler(-DDNNL_PPC64_HAS_MMA=1) + file(GLOB FILES_REQUIRED_OPT + ${CMAKE_CURRENT_SOURCE_DIR}/gemm/*.[ch]pp + ) + if(NOT UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG") + set_source_files_properties(${FILES_REQUIRED_OPT} + PROPERTIES COMPILE_FLAGS "-O3 -funroll-loops") + endif() endif() endif() @@ -139,7 +150,9 @@ if (DNNL_TARGET_ARCH STREQUAL "AARCH64") add_subdirectory(aarch64) endif() if (DNNL_TARGET_ARCH STREQUAL "PPC64") - add_subdirectory(ppc64) + if(DNNL_PPC64_HAS_MMA) + add_subdirectory(ppc64) + endif() endif() if (DNNL_TARGET_ARCH STREQUAL "S390X") add_subdirectory(s390x) diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp index 264168500a6..905ff09d3b7 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp @@ -36,7 +36,9 @@ const impl_list_map_t ®ular_f32_u8_impl_list_map() { DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) +#ifdef DNNL_PPC64_HAS_MMA DNNL_PPC64_ONLY(CPU_REORDER_INSTANCE(ppc64::ppc64_matrixA_reorder_t)) +#endif REG_FAST_DIRECT_COPY(f32, u8)