Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
b8d540d
CLNTFRAME-376: Add initial pipeline setup
JonathanC-ARM Jun 5, 2025
d69d3f5
Integrate initial KFI changes
JonathanC-ARM Jun 6, 2025
9e56664
updated build and test to have mac stages
JonathanC-ARM Jun 11, 2025
5c03bcd
Sync with latest from old repo
dflavin-arm Jun 12, 2025
48b09e3
Added Dynamic-Quantized Matmuls and GEMV
damdoo01-arm Jun 16, 2025
972eef5
Fixed copyright attribution
damdoo01-arm Jun 17, 2025
baa63df
KFI-51 Requires target "kleidiai" error building ONNX RT on aarch64.
Colm-in-Arm Jun 23, 2025
a4068c1
MLAS API updates, mlas test fixes and ORT test fixes
damdoo01-arm Jul 4, 2025
3b34766
Remove Arm CI internal directory inadvertently pushed previously
damdoo01-arm Jul 5, 2025
44199a5
Fix to iOS build
damdoo01-arm Jul 7, 2025
459acf8
2nd attempt to fix ios build by force disabling KAI
damdoo01-arm Jul 7, 2025
c675ccd
Wrap preprocessor ifdefs around dedicated KAI lib
damdoo01-arm Jul 7, 2025
8b8e6a0
Lint fixes
damdoo01-arm Jul 8, 2025
91008e9
Added Android/Linux CI build fixes plus fixed a layer parser fix
damdoo01-arm Jul 8, 2025
f984e81
Fix to 2 more CI failures. 1. kleidiai dir not visible in some builds…
damdoo01-arm Jul 9, 2025
25e9815
Remove badly named directory
damdoo01-arm Jul 9, 2025
3b9fd9a
Renamed kleidiai dir in lower case
damdoo01-arm Jul 9, 2025
fa558b3
Merge branch 'main' into kai_sgemm_igemm_quant_gemv
damdoo01-arm Jul 9, 2025
db144a0
QGemm call fixes that resolve the failing tests in CPU_U8S8_Precision…
Jul 9, 2025
38083e6
RESTRICT define created to overcome syntactical differences between c…
damdoo01-arm Jul 9, 2025
8285fcf
Resolve conflicts in mlasi.h
damdoo01-arm Jul 9, 2025
967e337
Added support to restrict keyword for msvc in KAI convolve function
damdoo01-arm Jul 9, 2025
ff5c2ea
Attempt to fix MSVC build by correctly linking kleidiai static library
damdoo01-arm Jul 10, 2025
8e6f4ea
Syntax fix on prev
damdoo01-arm Jul 10, 2025
d38d63d
Reverting back to where we were before MSVC work
damdoo01-arm Jul 15, 2025
dead330
Staged for MSVC changes (now Linux/MacOS compatible)
damdoo01-arm Jul 15, 2025
13a1c2b
Workaround pending KleidiAI release
damdoo01-arm Jul 16, 2025
8471aa4
Merge branch 'microsoft:main' into kai_sgemm_igemm_quant_gemv
damdoo01-arm Jul 16, 2025
6da3217
Merge branch 'microsoft:main' into kai_sgemm_igemm_quant_gemv
damdoo01-arm Jul 16, 2025
34ec660
Reverted to KleidiAI 1.9 removing SME support for MSVC. Fixed duktape…
damdoo01-arm Jul 16, 2025
be0b6bf
KleidiAI build in Android
damdoo01-arm Jul 21, 2025
b82f64b
Checkpointing lint fixes after android enablement and addressing comm…
damdoo01-arm Jul 21, 2025
810ee40
Added MSVC 1944 flag and fixed syntax issue on quantize matmul c++ ca…
damdoo01-arm Jul 22, 2025
6ef54d9
Reverted suggestion for 1944 gate, not possible without KAI release
damdoo01-arm Jul 22, 2025
1169ccc
Most of the responses to comments (small number outstanding)
damdoo01-arm Jul 22, 2025
db2feff
Unit test updates and lint fixes (more to follow)
damdoo01-arm Jul 23, 2025
1aa31f9
DynQuantMatMul enhancements
damdoo01-arm Jul 23, 2025
62175e8
Corrected typo
damdoo01-arm Jul 23, 2025
8e277e3
Added further comments
damdoo01-arm Jul 23, 2025
72dd877
Lint fix
damdoo01-arm Jul 23, 2025
795ed28
Temporary commit to check changes
damdoo01-arm Jul 23, 2025
baa2eb1
Refactor overrides for kleidi func calls and included fix for matmul …
damdoo01-arm Jul 23, 2025
9c7360e
Merge branch 'microsoft:main' into kai_sgemm_igemm_quant_gemv
damdoo01-arm Jul 24, 2025
00fda4a
Reinstated SME checks where still required
damdoo01-arm Jul 24, 2025
4379912
Added Dynamic Quantization Unit Test
damdoo01-arm Jul 24, 2025
74e8bf1
Fixed lint issues
damdoo01-arm Jul 24, 2025
e9c7291
Further updates to comments, dyn quant matmul changes only pending
damdoo01-arm Jul 24, 2025
b201e5e
Dynamic Quant Matmul changes based on feed back
damdoo01-arm Jul 24, 2025
dba3f0b
Tidy-up based on MS feedback
damdoo01-arm Jul 24, 2025
52961ea
Lint fixes
damdoo01-arm Jul 24, 2025
8523708
Added Jonathan's guard conditions for DynQuantMatMul
damdoo01-arm Jul 24, 2025
d45c6bb
Fixed unused variable error after guard include
damdoo01-arm Jul 24, 2025
4753512
Removed global variable and fixed transA override (removed inadverten…
damdoo01-arm Jul 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 28 additions & 13 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
option(onnxruntime_USE_KLEIDIAI "Build with KleidiAI integration in MLAS" OFF)
# iOS simulator build explicitly builds targets with USE_KLEIDIAI=ON so attempting to force override if so
if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
message(WARNING "Disabling KleidiAI: not supported on Apple x86_64 platforms")
set(onnxruntime_USE_KLEIDIAI OFF CACHE BOOL "" FORCE)
endif()
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
Expand Down Expand Up @@ -275,8 +280,6 @@ if (onnxruntime_ENABLE_TRAINING_APIS)
endif()




# Single output director for all binaries
set(RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin CACHE PATH "Single output directory for all binaries.")

Expand Down Expand Up @@ -648,17 +651,25 @@ else()
endif()
endif()

if (onnxruntime_USE_KLEIDIAI AND NOT MSVC AND (
(onnxruntime_target_platform STREQUAL "aarch64") OR
(onnxruntime_target_platform STREQUAL "ARM64") OR
(APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")))
check_cxx_compiler_flag(-march=armv8.2-a+dotprod HAS_ARM64_DOTPROD)
check_cxx_compiler_flag(-march=armv8.2-a+i8mm HAS_ARM64_I8MM)
if (NOT HAS_ARM64_DOTPROD)
message(FATAL_ERROR "The compiler doesn't support dotprod")
endif()
if (NOT HAS_ARM64_I8MM)
message(FATAL_ERROR "The compiler doesn't support i8mm")
if (onnxruntime_USE_KLEIDIAI AND (
(onnxruntime_target_platform STREQUAL "aarch64") OR
(onnxruntime_target_platform STREQUAL "ARM64") OR
(APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")))

# TODO Add checks for MSVC Compilation
if(NOT MSVC)
check_cxx_compiler_flag(-march=armv8.2-a+dotprod HAS_ARM64_DOTPROD)
check_cxx_compiler_flag(-march=armv8.2-a+i8mm HAS_ARM64_I8MM)
if (NOT HAS_ARM64_DOTPROD)
message(FATAL_ERROR "The compiler doesn't support dotprod")
endif()
if (NOT HAS_ARM64_I8MM)
message(FATAL_ERROR "The compiler doesn't support i8mm")
endif()
else()
message(STATUS "Skipping -march= checks on MSVC (not supported), assuming dotprod/i8mm support manually.")
set(HAS_ARM64_DOTPROD TRUE)
set(HAS_ARM64_I8MM TRUE)
endif()
endif()

Expand Down Expand Up @@ -1008,6 +1019,10 @@ function(onnxruntime_set_compile_flags target_name)
if (onnxruntime_ENABLE_ATEN)
target_compile_definitions(${target_name} PRIVATE ENABLE_ATEN)
endif()
# TODO: Narrow scope for Kleidiai compile
if (onnxruntime_USE_KLEIDIAI)
target_compile_definitions(${target_name} PRIVATE USE_KLEIDIAI)
endif()

set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON)
if (onnxruntime_USE_CUDA)
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.12.0.zip;7e733cfdc410d777b76122d64232499205589a96
dawn;https://github.com/google/dawn/archive/9733be39e18186961d503e064874afe3e9ceb8d1.zip;2a4017c32892b90d072a9102eba90ae691fae36d
kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.4.0.tar.gz;22d3b57b54a61c194ab256ff11b0353a3b220244
kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.9.0.tar.gz;a2765979f64efb173a4b8ba4de39dcba9c655786
duktape;https://github.com/svaarala/duktape/releases/download/v2.7.0/duktape-2.7.0.tar.xz;8200c8e417dbab7adcc12c4dbdef7651cfc55794
11 changes: 11 additions & 0 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

message(STATUS "Loading Dependencies URLs ...")

include(external/helper_functions.cmake)
Expand Down Expand Up @@ -819,6 +822,14 @@ if(onnxruntime_USE_COREML)

endif()

if(onnxruntime_USE_KLEIDIAI)
# Disable the KleidiAI tests
set(KLEIDIAI_BUILD_TESTS OFF)

onnxruntime_fetchcontent_declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai} EXCLUDE_FROM_ALL)
onnxruntime_fetchcontent_makeavailable(kleidiai)
endif()

set(onnxruntime_LINK_DIRS)
if (onnxruntime_USE_CUDA)
find_package(CUDAToolkit REQUIRED)
Expand Down
24 changes: 11 additions & 13 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -267,24 +267,23 @@ function(setup_mlas_source_for_windows)
endfunction()

function(setup_kleidiai)
target_compile_definitions(onnxruntime_mlas PRIVATE USE_KLEIDIAI)

# Disable the KleidiAI tests
set(KLEIDIAI_BUILD_TESTS OFF)

# Fetch KleidiAI sources:
if (NOT TARGET kleidiai)
onnxruntime_fetchcontent_declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai} EXCLUDE_FROM_ALL)
endif()
onnxruntime_fetchcontent_makeavailable(kleidiai)

target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/kai_ukernel_interface.cpp
${MLAS_SRC_DIR}/kleidiai/sgemm_kleidiai.cpp
${MLAS_SRC_DIR}/kleidiai/convolve_kleidiai.cpp
${MLAS_SRC_DIR}/kleidiai/qgemm_kleidiai.cpp
)
target_link_libraries(onnxruntime_mlas PRIVATE kleidiai)

list(APPEND onnxruntime_EXTERNAL_LIBRARIES kleidiai)
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES} PARENT_SCOPE)

if (NOT onnxruntime_BUILD_SHARED_LIB)
install(TARGETS kleidiai EXPORT ${PROJECT_NAME}Targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
endfunction()

if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
Expand All @@ -311,7 +310,6 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
elseif(MSVC)
setup_mlas_source_for_windows()
else()

if(APPLE)
get_target_property(ONNXRUNTIME_MLAS_OSX_ARCH onnxruntime_mlas OSX_ARCHITECTURES)

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/contrib_ops/cpu/bert/attention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ bool Attention<T>::IsPackWeightsSuccessful(int qkv_index,
const T* weights_data,
size_t weight_matrix_col_size,
/*out*/ PrePackedWeights* prepacked_weights) {
size_t packb_size = MlasGemmPackBSize(head_size, input_hidden_size);
size_t packb_size = MlasGemmPackBSize(CblasNoTrans, CblasNoTrans, head_size, input_hidden_size);
if (packb_size == 0) {
return false;
}
Expand All @@ -87,7 +87,7 @@ bool Attention<T>::IsPackWeightsSuccessful(int qkv_index,
memset(packed_weights_data, 0, packed_weights_data_size);

for (size_t i = 0; i < loop_len; i++) {
MlasGemmPackB(CblasNoTrans, head_size, input_hidden_size, weights_data, weight_matrix_col_size, packed_weights_data);
MlasGemmPackB(CblasNoTrans, CblasNoTrans, head_size, input_hidden_size, weights_data, weight_matrix_col_size, packed_weights_data);
packed_weights_data += packb_size;
weights_data += head_size;
}
Expand Down
Loading
Loading