Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions src/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,23 @@ if((DNNL_TARGET_ARCH STREQUAL "X64") OR (DNNL_TARGET_ARCH STREQUAL "AARCH64"))
endif()

if(DNNL_TARGET_ARCH STREQUAL "PPC64")
file(GLOB FILES_REQUIRED_OPT
${CMAKE_CURRENT_SOURCE_DIR}/gemm/*.[ch]pp
)
if(NOT UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
set_source_files_properties(${FILES_REQUIRED_OPT}
PROPERTIES COMPILE_FLAGS "-O3 -funroll-loops")
include(CheckSourceCompiles)

check_source_compiles(C [[
#if !defined(__MMA__)
# error "MMA not available"
#endif
int main(void) { return 0; }
]] DNNL_PPC64_HAS_MMA)
if(DNNL_PPC64_HAS_MMA)
add_definitions_with_host_compiler(-DDNNL_PPC64_HAS_MMA=1)
file(GLOB FILES_REQUIRED_OPT
${CMAKE_CURRENT_SOURCE_DIR}/gemm/*.[ch]pp
)
if(NOT UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
set_source_files_properties(${FILES_REQUIRED_OPT}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why these flags are now applied only for MMA systems?

PROPERTIES COMPILE_FLAGS "-O3 -funroll-loops")
endif()
endif()
endif()

Expand Down Expand Up @@ -139,7 +150,9 @@ if (DNNL_TARGET_ARCH STREQUAL "AARCH64")
add_subdirectory(aarch64)
endif()
if (DNNL_TARGET_ARCH STREQUAL "PPC64")
add_subdirectory(ppc64)
if(DNNL_PPC64_HAS_MMA)
add_subdirectory(ppc64)
endif()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems excluding the full directory here is not quite correct.
The CMakefile that should get chnges is src/cpu/ppc64/CMakeLists.txt. It should include extra files if DNNL_PPC64_HAS_MMA is defined under assumption that PPC support can have not only intrinsic support, or not only the last version of intrinsic support (like v8 versus v10).

endif()
if (DNNL_TARGET_ARCH STREQUAL "S390X")
add_subdirectory(s390x)
Expand Down
2 changes: 2 additions & 0 deletions src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ const impl_list_map_t &regular_f32_u8_impl_list_map() {
DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t))
DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t))

#ifdef DNNL_PPC64_HAS_MMA
DNNL_PPC64_ONLY(CPU_REORDER_INSTANCE(ppc64::ppc64_matrixA_reorder_t))
Copy link
Copy Markdown
Contributor

@dzarukin dzarukin Sep 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if DNNL_PPC64_ONLY can be re-qualified to include DNNL_PPC64_HAS_MMA...
If not, a new DNNL_PPC64_MMA_ONLY might be a better option.

Edit: it seems a new version of the macro would be needed anyway that would be coupled with build time changes related to DNNL_PPC64_HAS_MMA.

#endif

REG_FAST_DIRECT_COPY(f32, u8)

Expand Down
Loading