Skip to content

Commit

Permalink
Add amx option in CMakeLists; add amx detection and register header file
Browse files Browse the repository at this point in the history
Signed-off-by: Molly Sophia <[email protected]>
  • Loading branch information
MollySophia committed Aug 25, 2024
1 parent 25a22e0 commit 8916298
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "
endif()
endif()

if(MACOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
option(NCNN_APPLE_AMX "optimize apple silicon platforms with apple amx" ON)
endif()

if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
Expand Down
3 changes: 3 additions & 0 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ macro(ncnn_add_layer class)
if(NCNN_ARM86SVEF32MM)
ncnn_add_arch_opt_source(${class} svef32mm "-march=armv8.6-a+fp16+dotprod+sve+f32mm")
endif()
if(NCNN_APPLE_AMX)
ncnn_add_arch_opt_source(${class} amx " ")
endif()
endif()
endif()

Expand Down
15 changes: 15 additions & 0 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2378,6 +2378,21 @@ int cpu_support_arm_svef32mm()
#endif
}

int cpu_support_arm_amx()
{
try_initialize_global_cpu_info();
#if __aarch64__ && __APPLE__
return g_hw_cpufamily == CPUFAMILY_ARM_FIRESTORM_ICESTORM
|| g_hw_cpufamily == CPUFAMILY_ARM_AVALANCHE_BLIZZARD
|| g_hw_cpufamily == CPUFAMILY_ARM_IBIZA
|| g_hw_cpufamily == CPUFAMILY_ARM_LOBOS
|| g_hw_cpufamily == CPUFAMILY_ARM_PALMA;

#else
return 0;
#endif
}

int cpu_support_x86_avx()
{
try_initialize_global_cpu_info();
Expand Down
2 changes: 2 additions & 0 deletions src/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ NCNN_EXPORT int cpu_support_arm_svebf16();
NCNN_EXPORT int cpu_support_arm_svei8mm();
// svef32mm = aarch64 svef32mm
NCNN_EXPORT int cpu_support_arm_svef32mm();
// amx = aarch64 apple amx
NCNN_EXPORT int cpu_support_arm_amx();

// avx = x86 avx
NCNN_EXPORT int cpu_support_x86_avx();
Expand Down
50 changes: 50 additions & 0 deletions src/layer/arm/amx_usability.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef AMX_USABILITY_H
#define AMX_USABILITY_H

// From https://github.com/corsix/amx/blob/main/aarch64.h
#define AMX_NOP_OP_IMM5(op, imm5) \
__asm("nop\nnop\nnop\n.word (0x201000 + (%0 << 5) + %1)" : : "i"(op), "i"(imm5) : "memory")

#define AMX_OP_GPR(op, gpr) \
__asm(".word (0x201000 + (%0 << 5) + 0%1 - ((0%1 >> 4) * 6))" : : "i"(op), "r"((uint64_t)(gpr)) : "memory")

#define AMX_LDX(gpr) AMX_OP_GPR( 0, gpr)
#define AMX_LDY(gpr) AMX_OP_GPR( 1, gpr)
#define AMX_STX(gpr) AMX_OP_GPR( 2, gpr)
#define AMX_STY(gpr) AMX_OP_GPR( 3, gpr)
#define AMX_LDZ(gpr) AMX_OP_GPR( 4, gpr)
#define AMX_STZ(gpr) AMX_OP_GPR( 5, gpr)
#define AMX_LDZI(gpr) AMX_OP_GPR( 6, gpr)
#define AMX_STZI(gpr) AMX_OP_GPR( 7, gpr)
#define AMX_EXTRX(gpr) AMX_OP_GPR( 8, gpr)
#define AMX_EXTRY(gpr) AMX_OP_GPR( 9, gpr)
#define AMX_FMA64(gpr) AMX_OP_GPR(10, gpr)
#define AMX_FMS64(gpr) AMX_OP_GPR(11, gpr)
#define AMX_FMA32(gpr) AMX_OP_GPR(12, gpr)
#define AMX_FMS32(gpr) AMX_OP_GPR(13, gpr)
#define AMX_MAC16(gpr) AMX_OP_GPR(14, gpr)
#define AMX_FMA16(gpr) AMX_OP_GPR(15, gpr)
#define AMX_FMS16(gpr) AMX_OP_GPR(16, gpr)
#define AMX_SET() AMX_NOP_OP_IMM5(17, 0)
#define AMX_CLR() AMX_NOP_OP_IMM5(17, 1)
#define AMX_VECINT(gpr) AMX_OP_GPR(18, gpr)
#define AMX_VECFP(gpr) AMX_OP_GPR(19, gpr)
#define AMX_MATINT(gpr) AMX_OP_GPR(20, gpr)
#define AMX_MATFP(gpr) AMX_OP_GPR(21, gpr)
#define AMX_GENLUT(gpr) AMX_OP_GPR(22, gpr)

#endif // AMX_USABILITY_H

0 comments on commit 8916298

Please sign in to comment.