diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 87143e68c6dacf..f54f9f76edee0d 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -33,6 +33,43 @@ ov::element::Type get_arithmetic_binary_exec_precision(const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { +} + +jit_abs_emitter::jit_abs_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) { +} + +size_t jit_abs_emitter::get_inputs_count() const { return 1; } + +void jit_abs_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_abs_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + TReg src = TReg(in_vec_idxs[0]); + TReg dst = TReg(out_vec_idxs[0]); + + h->fabs(dst.s, src.s); +} + +std::set> jit_abs_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + /// ADD /// jit_add_emitter::jit_add_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index 1f89d7864d1003..59bc8214113357 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -10,6 +10,27 @@ namespace ov { namespace intel_cpu { namespace aarch64 { +class jit_abs_emitter : public jit_emitter { +public: + jit_abs_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_abs_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; + class jit_add_emitter : public jit_emitter { public: jit_add_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index d7c54ff1c141b5..f3a3e09ea67f76 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -18,6 +18,7 @@ bool JitEltwiseExecutor::isSupported( const float beta, const float gamma) { const auto is_supported = one_of(algorithm, + Algorithm::EltwiseAbs, Algorithm::EltwiseAdd, Algorithm::EltwiseClamp, Algorithm::EltwiseDivide, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index d1c3798dacf4d7..ef7349ed1aeac3 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -451,13 +451,13 @@ void jit_uni_eltwise_generic::store_vector(const XReg& ptr, break; } case ov::element::i8: { - fcvtns(data.s, data.s); + fcvtms(data.s, data.s); xtn(data.h4, data.s4); xtn(data.b8, data.h8); break; } case ov::element::u8: { - fcvtnu(data.s, data.s); + fcvtmu(data.s, data.s); xtn(data.h4, data.s4); xtn(data.b8, data.h8); break; @@ -515,14 +515,14 @@ void jit_uni_eltwise_generic::store_scalar(const XReg& ptr, } case ov::element::i8: { TReg vec_data(data.getIdx()); - fcvtns(vec_data.s, vec_data.s); + fcvtms(vec_data.s, vec_data.s); xtn(vec_data.h4, vec_data.s4); xtn(vec_data.b8, vec_data.h8); break; } case ov::element::u8: { TReg vec_data(data.getIdx()); - fcvtnu(vec_data.s, vec_data.s); + fcvtmu(vec_data.s, vec_data.s); xtn(vec_data.h4, vec_data.s4); xtn(vec_data.b8, vec_data.h8); break; @@ -609,6 +609,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte }; OV_SWITCH(intel_cpu, EltwiseEmitter, ctx, data.algo, + OV_CASE(Algorithm::EltwiseAbs, ov::intel_cpu::aarch64::jit_abs_emitter), OV_CASE(Algorithm::EltwiseAdd, ov::intel_cpu::aarch64::jit_add_emitter), OV_CASE(Algorithm::EltwiseClamp, ov::intel_cpu::aarch64::jit_clamp_emitter), OV_CASE(Algorithm::EltwiseDivide, ov::intel_cpu::aarch64::jit_divide_emitter), @@ -767,6 +768,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_SWITCH(intel_cpu, SupportedPrecisions, precisions, algo, OV_CASE(Algorithm::EltwiseRelu, jit_relu_emitter), + OV_CASE(Algorithm::EltwiseAbs, jit_abs_emitter), OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), OV_CASE(Algorithm::EltwiseClamp, jit_clamp_emitter), OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter),