@@ -74,112 +74,77 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
7474
7575    if  (CMAKE_OSX_ARCHITECTURES      STREQUAL  "arm64"  OR 
7676        CMAKE_GENERATOR_PLATFORM_LWR STREQUAL  "arm64"  OR 
77-         (NOT  CMAKE_OSX_ARCHITECTURES      AND 
78-         NOT  CMAKE_GENERATOR_PLATFORM_LWR AND 
77+         (NOT  CMAKE_OSX_ARCHITECTURES AND  NOT  CMAKE_GENERATOR_PLATFORM_LWR AND 
7978            CMAKE_SYSTEM_PROCESSOR  MATCHES  "^(aarch64|arm.*|ARM64)$" ))
8079
8180        message (STATUS  "ARM detected" )
8281
83-         if  (MSVC )
84-             list (APPEND  ARCH_DEFINITIONS __aarch64__) # MSVC defines _M_ARM64 instead 
85-             list (APPEND  ARCH_DEFINITIONS __ARM_NEON)
86-             list (APPEND  ARCH_DEFINITIONS __ARM_FEATURE_FMA)
87- 
88-             set (CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS} )
89-             string (JOIN " "  CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}  "/arch:armv8.2" )
90- 
91-             check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"  GGML_COMPILER_SUPPORT_DOTPROD)
92-             if  (GGML_COMPILER_SUPPORT_DOTPROD)
93-                 list (APPEND  ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
94- 
95-                 message (STATUS  "ARM feature DOTPROD enabled" )
96-             endif  ()
97- 
98-             check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_f32(_s, _a, _b); return 0; }"  GGML_COMPILER_SUPPORT_MATMUL_INT8)
99- 
100-             if  (GGML_COMPILER_SUPPORT_MATMUL_INT8)
101-                 list (APPEND  ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
102- 
103-                 message (STATUS  "ARM feature MATMUL_INT8 enabled" )
104-             endif  ()
105- 
106-             check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }"  GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
107-             if  (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
108-                 list (APPEND  ARCH_DEFINITIONS __ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
109- 
110-                 message (STATUS  "ARM feature FP16_VECTOR_ARITHMETIC enabled" )
111-             endif  ()
82+         if  (MSVC  AND  NOT  CMAKE_C_COMPILER_ID STREQUAL  "Clang" )
83+             message (FATAL_ERROR "MSVC is not supported for ARM, use clang" )
84+         else ()
85+             check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
86+             if  (NOT  "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} "  STREQUAL  "" )
87+                 list (APPEND  ARCH_FLAGS -mfp16-format=ieee)
88+             endif ()
11289
113-             set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV} )
114-         elseif  (APPLE )
11590            if  (GGML_NATIVE)
116-                 set (USER_PROVIDED_MARCH FALSE )
117-                 foreach (flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS  CMAKE_REQUIRED_FLAGS)
118-                     if  ("${${flag_var} }"  MATCHES  "-march=[a-zA-Z0-9+._-]+" )
119-                         set (USER_PROVIDED_MARCH TRUE )
120-                         break ()
121-                     endif ()
122-                 endforeach ()
123- 
124-                 if  (NOT  USER_PROVIDED_MARCH)
125-                     set (MARCH_FLAGS "-march=armv8.2a" )
126- 
127-                     check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"  GGML_COMPILER_SUPPORT_DOTPROD)
128-                     if  (GGML_COMPILER_SUPPORT_DOTPROD)
129-                         set (MARCH_FLAGS "${MARCH_FLAGS} +dotprod" )
130-                         list (APPEND  ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
91+                 list (APPEND  ARCH_FLAGS -mcpu=native)
13192
132-                         message (STATUS  "ARM feature DOTPROD enabled" )
133-                     endif  ()
93+                 set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
13494
135-                     set (TEST_I8MM_FLAGS "-march=armv8.2a+i8mm" )
95+                 # -mcpu=native does not always enable all the features in some compilers, 
96+                 # so we check for them manually and enable them if available 
13697
137-                     set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
138-                     set (CMAKE_REQUIRED_FLAGS     "${CMAKE_REQUIRED_FLAGS}  ${TEST_I8MM_FLAGS} " )
98+                 include (CheckCXXSourceRuns)
13999
140-                     check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"  GGML_COMPILER_SUPPORT_MATMUL_INT8)
141-                     if  (GGML_COMPILER_SUPPORT_MATMUL_INT8)
142-                         set (MARCH_FLAGS "${MARCH_FLAGS} +i8mm" )
143-                         list (APPEND  ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
100+                 set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS} +dotprod" )
101+                 check_cxx_source_runs(
102+                     "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" 
103+                     GGML_COMPILER_SUPPORT_DOTPROD)
104+                 if  (GGML_COMPILER_SUPPORT_DOTPROD)
105+                     set (ARCH_FLAGS "${ARCH_FLAGS} +dotprod" )
106+                 endif ()
144107
145-                         message (STATUS  "ARM feature MATMUL_INT8 enabled" )
146-                     endif  ()
108+                 set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS} +i8mm" )
109+                 check_cxx_source_runs(
110+                     "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" 
111+                     GGML_COMPILER_SUPPORT_I8MM)
112+                 if  (GGML_COMPILER_SUPPORT_I8MM)
113+                     set (ARCH_FLAGS "${ARCH_FLAGS} +i8mm" )
114+                 endif ()
147115
148-                      set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
116+                 set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
149117
150-                     list (APPEND  ARCH_FLAGS "${MARCH_FLAGS} " )
151-                 endif  ()
152-             endif  ()
153-         else ()
154-             check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
155-             if  (NOT  "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} "  STREQUAL  "" )
156-                 list (APPEND  ARCH_FLAGS -mfp16-format=ieee)
157-             endif ()
158-             if  (${CMAKE_SYSTEM_PROCESSOR}  MATCHES  "armv6" )
159-                 # Raspberry Pi 1, Zero 
160-                 list (APPEND  ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
161-             endif ()
162-             if  (${CMAKE_SYSTEM_PROCESSOR}  MATCHES  "armv7" )
163-                 if  ("${CMAKE_SYSTEM_NAME} "  STREQUAL  "Android" )
164-                     # Android armeabi-v7a 
165-                     list (APPEND  ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
166-                 else ()
167-                     # Raspberry Pi 2 
168-                     list (APPEND  ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
118+             else ()
119+                 if  (GGML_CPU_ARM_ARCH)
120+                     list (APPEND  ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH} )
169121                endif ()
170122            endif ()
171-             if  (${CMAKE_SYSTEM_PROCESSOR}  MATCHES  "armv8" )
172-                 # Android arm64-v8a 
173-                 # Raspberry Pi 3, 4, Zero 2 (32-bit) 
174-                 list (APPEND  ARCH_FLAGS -mno-unaligned-access)
175-             endif ()
176-             if  (GGML_SVE)
177-                 list (APPEND  ARCH_FLAGS -march=armv8.6-a+sve)
123+ 
124+             # show enabled features 
125+             execute_process (
126+                 COMMAND  ${CMAKE_C_COMPILER}  ${ARCH_FLAGS}  -dM -E -
127+                 INPUT_FILE  "/dev/null" 
128+                 OUTPUT_VARIABLE  ARM_FEATURE
129+                 RESULT_VARIABLE  ARM_FEATURE_RESULT
130+             )
131+             if  (ARM_FEATURE_RESULT)
132+                 message (FATAL_ERROR "Failed to get ARM features" )
133+             else ()
134+                 foreach (feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
135+                     string (FIND  "${ARM_FEATURE} "  "__ARM_FEATURE_${feature}  1"  feature_pos)
136+                     if  (NOT  ${feature_pos}  EQUAL  -1)
137+                         message (STATUS  "ARM feature ${feature}  enabled" )
138+                     endif ()
139+                 endforeach ()
178140            endif ()
179141        endif ()
180142    elseif  (CMAKE_OSX_ARCHITECTURES STREQUAL  "x86_64"  OR  CMAKE_GENERATOR_PLATFORM_LWR MATCHES  "^(x86_64|i686|amd64|x64|win32)$"  OR 
181143            (NOT  CMAKE_OSX_ARCHITECTURES AND  NOT  CMAKE_GENERATOR_PLATFORM_LWR AND 
182144            CMAKE_SYSTEM_PROCESSOR  MATCHES  "^(x86_64|i686|AMD64|amd64)$" ))
145+ 
146+         message (STATUS  "x86 detected" )
147+ 
183148        if  (MSVC )
184149            # instruction set detection for MSVC only 
185150            if  (GGML_NATIVE)
0 commit comments