Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/cpu/cpu_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#define CPU_INSTANCE_AARCH64_ACL(...) \
DNNL_AARCH64_ACL_ONLY(CPU_INSTANCE(__VA_ARGS__))
#define CPU_INSTANCE_RV64GCV(...) DNNL_RV64GCV_ONLY(CPU_INSTANCE(__VA_ARGS__))
#define CPU_INSTANCE_PPC64(...) DNNL_PPC64_ONLY(CPU_INSTANCE(__VA_ARGS__))

namespace dnnl {
namespace impl {
Expand Down
24 changes: 7 additions & 17 deletions src/cpu/gemm/gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

using namespace dnnl::impl::cpu::x64;
#elif DNNL_PPC64
#include "cpu/ppc64/ppc64_gemm_driver.hpp"
#include "cpu/ppc64/gemm/gemm_driver.hpp"
using namespace dnnl::impl::cpu::ppc64;
#elif DNNL_S390X
#include "cpu/s390x/gemm.h"
Expand Down Expand Up @@ -209,11 +209,10 @@ dnnl_status_t gemm_s8u8s32(const char *transa, const char *transb,
}
#elif DNNL_PPC64
#ifdef __MMA__
int ATflag = (*transa == 'T') || (*transa == 't');
int BTflag = (*transb == 'T') || (*transb == 't');

return cblas_gemm_s8x8s32_ppc64(ATflag, BTflag, offsetc, *M, *N, *K, *alpha,
A, *LDA, ao, B, *LDB, bo, C, *beta, *LDC, co, 0);
status = gemm_driver(transa, transb, offsetc, M, N, K, alpha, A, LDA, ao, B,
LDB, bo, beta, C, LDC, co, false);
if (status != status::unimplemented) return status;
#endif
#elif DNNL_S390X
#if defined(__VX__)
Expand Down Expand Up @@ -259,18 +258,9 @@ dnnl_status_t gemm_s8s8s32(const char *transa, const char *transb,

#if DNNL_PPC64
#ifdef __MMA__
int ATflag = (*transa == 'T') || (*transa == 't');
int BTflag = (*transb == 'T') || (*transb == 't');

// Note please that the coercion of "B" and "bo" from int8_t to uint8_t is
// accompanied by the last parameter being set to "1" instead of "0", as
// in the analogous call in the previous routine above.
// This last parameter flags the fact of the coercion, so the called routine
// can process "B" and "bo" appropriately.

return cblas_gemm_s8x8s32_ppc64(ATflag, BTflag, offsetc, *M, *N, *K, *alpha,
A, *LDA, ao, (const uint8_t *)B, *LDB, (const uint8_t *)bo, C,
*beta, *LDC, co, 1);
status = gemm_driver(transa, transb, offsetc, M, N, K, alpha, A, LDA, ao, B,
LDB, bo, beta, C, LDC, co, false);
if (status != status::unimplemented) return status;
#endif
#elif DNNL_S390X
#if defined(__VX__)
Expand Down
4 changes: 3 additions & 1 deletion src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
const gemm_based::params_t &params = pd()->params();
const bool use_single_gemm_call = pd()->has_runtime_dims_or_strides()
? helper.use_single_gemm_call_optimization(po)
: params.use_single_gemm_call_optimization_;
: ((platform::is_ppc64() && ndims == 2)
|| params.use_single_gemm_call_optimization_);
bool dst_is_acc = params.dst_is_acc_;
int32_t *acc = dst_is_acc
? reinterpret_cast<int32_t *>(dst)
Expand All @@ -297,6 +298,7 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
== (1 << (ndims - 1));

std::atomic<status_t> st(status::success);

if (!use_single_gemm_call) {
const int src_mask
= utils::get_dims_mask(dst_d.dims(), src_d.dims(), ndims);
Expand Down
11 changes: 10 additions & 1 deletion src/cpu/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
// Helper macros: expand the parameters only on the corresponding architecture.
// Equivalent to: #if DNNL_$ARCH ... #endif
#define DNNL_X64_ONLY(...) Z_CONDITIONAL_DO(DNNL_X64, __VA_ARGS__)
#define DNNL_PPC64_ONLY(...) Z_CONDITIONAL_DO(DNNL_PPC64_ONLY, __VA_ARGS__)
#define DNNL_PPC64_ONLY(...) Z_CONDITIONAL_DO(DNNL_PPC64, __VA_ARGS__)
#define DNNL_S390X_ONLY(...) Z_CONDITIONAL_DO(DNNL_S390X_ONLY, __VA_ARGS__)
#define DNNL_AARCH64_ONLY(...) Z_CONDITIONAL_DO(DNNL_AARCH64, __VA_ARGS__)

Expand Down Expand Up @@ -182,6 +182,15 @@ constexpr int get_cache_line_size() {

int get_vector_register_size();

// Helper to avoid #ifdefs for DNNL_PPC64
static constexpr bool is_ppc64() {
#if DNNL_PPC64
return true;
#else
return false;
#endif
}

size_t get_timestamp();

} // namespace platform
Expand Down
Loading