Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2911e70
add sliding window support for webgpu gqa (#25372)
guschmue Jul 17, 2025
2536acf
[TRT-EP] Add loadModelProto APIs (#25409)
kevinch-nv Jul 17, 2025
798d890
[NV RTX EP] Upstream changes from the win-ort (#25370)
ishwar-raut1 Jul 17, 2025
819f32f
Enable free dimension override for graph optimization level 0 (#25425)
chilo-ms Jul 17, 2025
e6c84b8
[CUDA] Support head_sink in flash attention for GQA (#25432)
tianleiwu Jul 17, 2025
fa50e23
[QNN EP] Add EP-aware Reshape handler for Transpose optimization. (#2…
minfhong-quic Jul 18, 2025
fef1b75
Revert "revert qnn sdk version (#25426)" (#25437)
qti-jkilpatrick Jul 18, 2025
e3f338c
[OVEP] Update OV version to 2025.2.0 (#25436)
sfatimar Jul 18, 2025
551fa6f
Ovep Feature Rel 1.23 (#25435)
sfatimar Jul 18, 2025
919cd0a
fix shape inference error for ep context nodes (#25398)
wcy123 Jul 18, 2025
12121aa
Update API ReadOpAttr and CreateOpAttr for string type to unblock EPs…
HectorSVC Jul 18, 2025
ff53f92
increase timeout for onnxruntime-ios-packaging-pipeline (#25438)
prathikr Jul 18, 2025
251eeb2
Bump on-headers and compression in /js/react_native/e2e (#25439)
dependabot[bot] Jul 18, 2025
1d00bff
Bump transformers from 4.48.0 to 4.52.1 in /tools/ci_build/requiremen…
dependabot[bot] Jul 18, 2025
f190e70
add webgpu support for GatherBlockQuantized (#25413)
guschmue Jul 18, 2025
7f193b1
[VitisAI] Upstream changes from win-ort (#25448)
nieubank Jul 19, 2025
8eea128
[WebNN] Fix some spelling and naming issues (#25433)
Honry Jul 19, 2025
9d11ae2
Plugin EP data transfer and Stream support. (#25254)
skottmckay Jul 19, 2025
033ca86
[NV RTX EP] Set Compute Capability only on Turing architecture (#25446)
keshavv27 Jul 19, 2025
b45edfb
[NvTensorRTRTX EP]Disable Fast GELU operator in base model used for N…
keshavv27 Jul 19, 2025
16701a2
[CANN]Fix issue with negative dynamic tensor shape (#25431)
bachelor-dou Jul 19, 2025
8642e8b
Merge branch 'master' into sync_ort_main_19_7_25
ankitm3k Jul 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/windows_openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ jobs:
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');

- name: Download OpenVINO Toolkit v2025.0.0
- name: Download OpenVINO Toolkit v2025.2.0
env:
OpenVINOVersion: 2025.0.0
OpenVINOVersion: 2025.2.0
shell: pwsh
run: |
$Url = "https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/windows/openvino_toolkit_windows_2025.0.0.17942.1f68be9f594_x86_64.zip"
$Url ="https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.2/windows/openvino_toolkit_windows_2025.2.0.19140.c01cd93e24d_x86_64.zip"
$OutputPath = "$env:RUNNER_TEMP\openvino.zip"
$ExtractPath = "$env:RUNNER_TEMP\openvino-v$env:OpenVINOVersion"
$TempExtractPath = "$env:RUNNER_TEMP\openvino_temp"
Expand Down Expand Up @@ -102,7 +102,7 @@ jobs:
shell: pwsh
# Use $GITHUB_ENV to set the variable for subsequent steps
run: |
$openVinoRootDir = Join-Path $env:RUNNER_TEMP "openvino-v2025.0.0"
$openVinoRootDir = Join-Path $env:RUNNER_TEMP "openvino-v2025.2.0"
echo "OpenVINORootDir=$openVinoRootDir" >> $env:GITHUB_ENV

- name: Print OpenVINORootDir after downloading OpenVINO
Expand Down
15 changes: 9 additions & 6 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -498,17 +498,19 @@ set (ONNXRUNTIME_AUTOEP_TEST_SRC_DIR "${TEST_SRC_DIR}/autoep")
set (ONNXRUNTIME_EP_GRAPH_TEST_SRC_DIR "${TEST_SRC_DIR}/ep_graph")

set (onnxruntime_shared_lib_test_SRC
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_session_options.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_run_options.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.h
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_allocator.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_nontensor_types.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_data_copy.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_loading.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_nontensor_types.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_ort_format_models.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_run_options.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_session_options.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.h
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.h
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc)
)

if (NOT onnxruntime_MINIMAL_BUILD)
list(APPEND onnxruntime_shared_lib_test_SRC ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_inference.cc)
Expand Down Expand Up @@ -722,6 +724,7 @@ endif()
if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/qnn/*)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/qnn/qnn_node_group/*)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/qnn/optimizer/*)
list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_qnn)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_qnn)
if(NOT onnxruntime_BUILD_QNN_EP_STATIC_LIB)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,12 @@ struct OrtTensorRTProviderOptionsV2 {
size_t trt_onnx_bytestream_size{0}; // size of the byte stream provided as "trt_onnx_bytestream"
// can be updated using: UpdateTensorRTProviderOptionsWithValue

const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true
const char* trt_op_types_to_exclude{}; // Exclude specific ops from running on TRT.
const void* trt_external_data_bytestream{nullptr}; // The byte stream containing the weights to override the ones provided in the ONNX model.
// can be updated using: UpdateTensorRTProviderOptionsWithValue
size_t trt_external_data_bytestream_size{0}; // size of the byte stream provided as "trt_external_data_bytestream"
// can be updated using: UpdateTensorRTProviderOptionsWithValue
const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true
const char* trt_op_types_to_exclude{}; // Exclude specific ops from running on TRT.
int trt_load_user_initializer{0}; // Save initializers locally instead of to disk. Default 0 = false, nonzero = true
};
4 changes: 2 additions & 2 deletions include/onnxruntime/core/providers/utils/ort_graph_to_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -665,11 +665,11 @@ static Ort::Status OrtOpAttrToProto(const OrtOpAttr& ort_attr, onnx::AttributePr
Ort::Status status{ort_api.ReadOpAttr(&ort_attr, attr_type, nullptr, 0, &total_attr_bytes)};
std::string* str = attr_proto.mutable_s();

str->resize(total_attr_bytes, '\0');
str->resize(total_attr_bytes);
ORT_EP_UTILS_C_RETURN_IF_ERROR(ort_api.ReadOpAttr(&ort_attr, attr_type, str->data(), total_attr_bytes,
&total_attr_bytes));

str->resize(total_attr_bytes - 1); // remove extra ending terminating '\0' character.
str->resize(total_attr_bytes);
break;
}
case OrtOpAttrType::ORT_OP_ATTR_STRINGS: {
Expand Down
29 changes: 28 additions & 1 deletion include/onnxruntime/core/session/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "core/common/status.h"
#include "core/framework/allocator.h"
#include "core/framework/execution_provider.h"
#include "core/framework/data_transfer_manager.h"
#include "core/platform/device_discovery.h"
#include "core/platform/threadpool.h"

Expand Down Expand Up @@ -140,6 +141,10 @@
OrtDeviceMemoryType mem_type, OrtAllocatorType allocator_type,
const OrtKeyValuePairs* allocator_options, OrtAllocator** allocator);
Status ReleaseSharedAllocator(const OrtEpDevice& ep_device, OrtDeviceMemoryType mem_type);

const DataTransferManager& GetDataTransferManager() const {
return data_transfer_mgr_;
}
#endif // !defined(ORT_MINIMAL_BUILD)

// return a shared allocator from a plugin EP or custom allocator added with RegisterAllocator
Expand Down Expand Up @@ -185,6 +190,23 @@

using OrtAllocatorUniquePtr = std::unique_ptr<OrtAllocator, std::function<void(OrtAllocator*)>>;

// if the user calls CreateSharedAllocator and wraps the plugin EP's allocator with an arena we end up with
// OrtAllocator from EP -> wrapped in IAllocatorImplWrappingOrtAllocator -> inside a BFCArena IAllocator.
// we can put that in shared_allocators_ for sessions to use, but to have an OrtAllocator available in
// shared_ort_allocators_ that can be used outside of a session we need to additionally wrap that in an
// OrtAllocatorImplWrappingIAllocator. way too many levels of indirection but that is what it is currently.
// we need something to own that final OrtAllocator, so we add it to arena_ort_allocators_.
//
// TODO: we could split out the BFCArena implementation so it can be plugged into either an IAllocator

Check notice on line 200 in include/onnxruntime/core/session/environment.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] include/onnxruntime/core/session/environment.h#L200

Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
Raw output
include/onnxruntime/core/session/environment.h:200:  Missing username in TODO; it should look like "// TODO(my_username): Stuff."  [readability/todo] [2]
// or an OrtAllocator instance to reduce the indirection a little.
// with that we get an OrtAllocator from the EP, wrap it with an OrtAllocator based BFCArena, and wrap that with the
// IAllocatorImplWrappingOrtAllocator which takes ownership of the OrtAllocator and is in shared_allocators_.
//
// Alternatively we can disable wrapping an EP's allocator with a BFCArena and say the EP should provide the arena
// implementation directly. They're free to copy BFCArena as it came from TF originally. Or we could provide a
// cut-and-paste BFCArena implementation that works using the EP API that can be included in the EP source.
std::unordered_map<const OrtMemoryInfo*, std::unique_ptr<OrtAllocatorImplWrappingIAllocator>> arena_ort_allocators_;

#if !defined(ORT_MINIMAL_BUILD)
// register EPs that are built into the ORT binary so they can take part in AutoEP selection
// added to ep_libraries
Expand All @@ -207,7 +229,9 @@

std::unique_ptr<EpLibrary> library;
std::vector<std::unique_ptr<OrtEpDevice>> execution_devices;
std::vector<EpFactoryInternal*> internal_factories; // factories that can create IExecutionProvider instances
std::vector<OrtEpFactory*> factories;
std::vector<EpFactoryInternal*> internal_factories; // factories that can create IExecutionProvider instances
std::vector<plugin_ep::DataTransfer*> data_transfers; // data transfer instances for this EP.

private:
EpInfo() = default;
Expand All @@ -223,6 +247,9 @@

// lookup set for internal EPs so we can create an IExecutionProvider directly
std::unordered_set<EpFactoryInternal*> internal_ep_factories_;

DataTransferManager data_transfer_mgr_; // plugin EP IDataTransfer instances

#endif // !defined(ORT_MINIMAL_BUILD)
};

Expand Down
Loading
Loading