Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 0 additions & 133 deletions .github/workflows/sca.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,3 @@
path = cmake/external/emsdk
url = https://github.com/emscripten-core/emsdk.git
branch = 3.1.44
[submodule "cmake/external/onnxruntime-extensions"]
path = cmake/external/onnxruntime-extensions
url = https://github.com/microsoft/onnxruntime-extensions.git
34 changes: 34 additions & 0 deletions ThirdPartyNotices.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6230,3 +6230,37 @@ https://github.com/intel/neural-compressor
terms, and open source software license terms. These separate license terms
govern your use of the third party programs as set forth in the
"THIRD-PARTY-PROGRAMS" file.

_____

FlashAttention, https://github.com/Dao-AILab/flash-attention

BSD 3-Clause License

Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2 changes: 1 addition & 1 deletion VERSION_NUMBER
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.16.0
1.16.2
11 changes: 10 additions & 1 deletion cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to prov
option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
option(onnxruntime_USE_LLVM "Build TVM with LLVM" OFF)

option(onnxruntime_USE_FLASH_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "NOT WIN32; onnxruntime_USE_CUDA" OFF)
option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)

option(onnxruntime_BUILD_FOR_NATIVE_MACHINE "Enable this option for turning on optimization specific to this machine" OFF)
option(onnxruntime_USE_AVX "Use AVX instructions" OFF)
Expand Down Expand Up @@ -666,13 +667,16 @@ if (onnxruntime_USE_CUDA)

if (onnxruntime_DISABLE_CONTRIB_OPS)
set(onnxruntime_USE_FLASH_ATTENTION OFF)
set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6)
message( STATUS "Turn off flash attention since CUDA compiler version < 11.6")
set(onnxruntime_USE_FLASH_ATTENTION OFF)
set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()
else()
set(onnxruntime_USE_FLASH_ATTENTION OFF)
set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()

if (onnxruntime_USE_CUDA)
Expand All @@ -685,6 +689,11 @@ if (onnxruntime_USE_CUDA)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_FLASH_ATTENTION=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_FLASH_ATTENTION=1)
endif()
if (onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
message( STATUS "Enable memory efficient attention for CUDA EP")
list(APPEND ORT_PROVIDER_FLAGS -DUSE_MEMORY_EFFICIENT_ATTENTION=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=1)
endif()

endif()
if (onnxruntime_USE_VITISAI)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/cutlass.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if (onnxruntime_USE_FLASH_ATTENTION)
if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
include(FetchContent)
FetchContent_Declare(
cutlass
Expand Down
3 changes: 1 addition & 2 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ if (onnxruntime_BUILD_UNIT_TESTS)
FetchContent_Declare(
googletest
URL ${DEP_URL_googletest}
FIND_PACKAGE_ARGS 1.13.0...<2.0.0 NAMES GTest
URL_HASH SHA1=${DEP_SHA1_googletest}
OVERRIDE_FIND_PACKAGE
)
endif()

Expand Down Expand Up @@ -528,4 +528,3 @@ endif()

FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)

2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ if (onnxruntime_USE_CUDA)
target_link_libraries(${target} PRIVATE cuda)
endif()

if (onnxruntime_USE_FLASH_ATTENTION)
if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
include(cutlass)
target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples)
endif()
Expand Down
4 changes: 4 additions & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ set(training_ops_excluded_files
"reduction/reduction_ops.cc" # no double type support
"cuda_training_kernels.cc"
"cuda_training_kernels.h"
"nn/conv_shared.cc"
"nn/conv_shared.h"
"nn/conv_transpose_grad.cc"
"nn/conv_transpose_grad.h"
)

function(auto_set_source_files_hip_language)
Expand Down
2 changes: 1 addition & 1 deletion csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ internal static OrtValue CreateFromTensorObject(TensorBase value, out TensorElem
/// <summary>
/// Creates an OrtValue that contains a string tensor of specified shape, and
/// containing empty strings. String tensors are always on CPU.
/// Use FillStringTensorElement to assign individual elements values.
/// Use StringTensorSetElementAt to assign individual elements values.
/// </summary>
/// <param name="allocator"></param>
/// <returns>disposable OrtValue</returns>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public struct OrtTrainingApi
public IntPtr LoadCheckpoint;
public IntPtr SaveCheckpoint;
public IntPtr CreateTrainingSession;
public IntPtr CreateTrainingSessionFromBuffer;
public IntPtr TrainingSessionGetTrainingModelOutputCount;
public IntPtr TrainingSessionGetEvalModelOutputCount;
public IntPtr TrainingSessionGetTrainingModelOutputName;
Expand Down
10 changes: 10 additions & 0 deletions docs/python/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://ak
Changes
-------

1.16.2
^^^^^^

Release Notes : https://github.com/Microsoft/onnxruntime/releases/tag/v1.16.2

1.16.1
^^^^^^

Release Notes : https://github.com/Microsoft/onnxruntime/releases/tag/v1.16.1

1.16.0
^^^^^^

Expand Down
Loading