Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
6519034
Cuda Arena migration plan
yuslepukhin Apr 1, 2026
26fcaae
Update the design
yuslepukhin Apr 1, 2026
9dad919
Clarify IArena inhertance
yuslepukhin Apr 1, 2026
0027c19
Address review comments
yuslepukhin Apr 1, 2026
ad48120
Clarify Environment::CreateAndRegisterAllocatorV2()
yuslepukhin Apr 1, 2026
93850d9
Address review comments
yuslepukhin Apr 1, 2026
318edae
Re-design for a in-plugin arena using examples as a base
yuslepukhin Apr 2, 2026
b6973b6
Address review comments
yuslepukhin Apr 2, 2026
6748f7d
Re-work inheritance of Cuda Arean allocators
yuslepukhin Apr 2, 2026
2bcd8d3
Adjust CudaMempoolOrtAllocator
yuslepukhin Apr 2, 2026
4730e8d
Address review comments
yuslepukhin Apr 2, 2026
d335e7b
Address comments
yuslepukhin Apr 2, 2026
71c3ec5
Implement Phase I
yuslepukhin Apr 2, 2026
32f1fbc
lintrunner
yuslepukhin Apr 2, 2026
a19d9d3
Address review comments and make this build and test run. Phase I
yuslepukhin Apr 3, 2026
7b3bb5f
Address review comments
yuslepukhin Apr 3, 2026
a71b93a
Address comments
yuslepukhin Apr 4, 2026
1ea0d94
Address comments
yuslepukhin Apr 4, 2026
8f850a3
Address review comments
yuslepukhin Apr 4, 2026
27c3bc4
Integrate CudMempoolAllocator
yuslepukhin Apr 4, 2026
9c60d8a
Merge branch 'main' into yuslepukhin/cuda_arena_ep
yuslepukhin Apr 6, 2026
2cde673
Address review comments
yuslepukhin Apr 6, 2026
8f81a39
Address review comments, add public Reserve API, improve test coverage
yuslepukhin Apr 6, 2026
552d0e6
address comments
yuslepukhin Apr 6, 2026
700eb6c
Address review issues
yuslepukhin Apr 6, 2026
5a73a66
Add Shrink API
yuslepukhin Apr 6, 2026
c60b59b
Address review comments
yuslepukhin Apr 6, 2026
9961b56
Address review comments
yuslepukhin Apr 7, 2026
121d53b
Merge branch 'main' into yuslepukhin/cuda_arena_ep
yuslepukhin Apr 7, 2026
982eb6a
Add ArenaAllocator wrapper for Shrink and ReleaseStreamBuffers
yuslepukhin Apr 7, 2026
540962d
Address review comments
yuslepukhin Apr 7, 2026
6151008
Update docs
yuslepukhin Apr 7, 2026
9aebc8c
address review comments
yuslepukhin Apr 7, 2026
1c612cc
Address most recent comments
yuslepukhin Apr 7, 2026
da13dd5
Address compile issues. Add test.
yuslepukhin Apr 7, 2026
4eb4238
Merge branch 'main' into yuslepukhin/cuda_arena_ep
yuslepukhin Apr 8, 2026
e0204a8
Address review comments
yuslepukhin Apr 8, 2026
65769d5
Build error
yuslepukhin Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmake/onnxruntime_providers_cuda_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ onnxruntime_add_shared_library_module(onnxruntime_providers_cuda_plugin
${CUDA_PLUGIN_EP_CC_SRCS}
${CUDA_PLUGIN_EP_CU_SRCS}
)

# Mirror directory structure in the Visual Studio solution tree.
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${CUDA_EP_CC_SRCS} ${CUDA_EP_CU_SRCS})
source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${CUDA_CONTRIB_OPS_CC_SRCS} ${CUDA_CONTRIB_OPS_CU_SRCS})
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated
# Keep the plugin CUDA target aligned with the repo-wide C++20 baseline.
# Forcing CUDA C++17 here breaks newer protobuf/absl headers used by the plugin
# build, as absl::compare expects standard ordering support in this configuration.
Expand Down
7 changes: 7 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,13 @@ if (onnxruntime_USE_CUDA AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_R
)
list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cuda_src})

if (onnxruntime_BUILD_CUDA_EP_AS_PLUGIN)
file(GLOB onnxruntime_test_providers_cuda_plugin_src CONFIGURE_DEPENDS
"${TEST_SRC_DIR}/providers/cuda/plugin/*.cc"
)
list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cuda_plugin_src})
endif()

if (onnxruntime_USE_CUDA_NHWC_OPS AND CUDNN_MAJOR_VERSION GREATER 8)
file(GLOB onnxruntime_test_providers_cuda_nhwc_src CONFIGURE_DEPENDS
"${TEST_SRC_DIR}/providers/cuda/nhwc/*.cc"
Expand Down
683 changes: 683 additions & 0 deletions docs/cuda_plugin_ep/arena_allocator_migration_design.md

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions onnxruntime/core/providers/cuda/plugin/cuda_allocator_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@

#include "cuda_plugin_utils.h"

#include <algorithm>
#include <sstream>
#include <string>
#include <type_traits>

namespace onnxruntime {
namespace cuda_plugin {

Expand All @@ -35,6 +40,51 @@ class CudaAllocatorBase : public OrtAllocator {
const OrtMemoryInfo* memory_info_;
};

static_assert(std::is_standard_layout_v<CudaAllocatorBase>,
"CudaAllocatorBase must be standard-layout so that OrtAllocator* and "
"CudaAllocatorBase* share the same address.");

/// Allocator statistics tracked by arena allocators.
struct AllocatorStats {
int64_t num_allocs = 0;
int64_t num_reserves = 0;
int64_t num_arena_extensions = 0;
int64_t num_arena_shrinkages = 0;
int64_t bytes_in_use = 0;
int64_t total_allocated_bytes = 0;
int64_t max_bytes_in_use = 0;
int64_t max_alloc_size = 0;
int64_t bytes_limit = 0;

void ToKeyValuePairs(const OrtApi& api, OrtKeyValuePairs* kvps) const {
if (num_allocs > 0 || bytes_limit != 0) {
api.AddKeyValuePair(kvps, "Limit", std::to_string(bytes_limit).c_str());
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated
api.AddKeyValuePair(kvps, "InUse", std::to_string(bytes_in_use).c_str());
api.AddKeyValuePair(kvps, "TotalAllocated", std::to_string(total_allocated_bytes).c_str());
api.AddKeyValuePair(kvps, "MaxInUse", std::to_string(max_bytes_in_use).c_str());
api.AddKeyValuePair(kvps, "NumAllocs", std::to_string(num_allocs).c_str());
api.AddKeyValuePair(kvps, "NumReserves", std::to_string(num_reserves).c_str());
api.AddKeyValuePair(kvps, "NumArenaExtensions", std::to_string(num_arena_extensions).c_str());
api.AddKeyValuePair(kvps, "NumArenaShrinkages", std::to_string(num_arena_shrinkages).c_str());
api.AddKeyValuePair(kvps, "MaxAllocSize", std::to_string(max_alloc_size).c_str());
}
}

std::string DebugString() const {
std::ostringstream ss;
ss << "Limit: " << bytes_limit << "\n"
<< "InUse: " << bytes_in_use << "\n"
<< "TotalAllocated: " << total_allocated_bytes << "\n"
<< "MaxInUse: " << max_bytes_in_use << "\n"
<< "NumAllocs: " << num_allocs << "\n"
<< "NumReserves: " << num_reserves << "\n"
<< "NumArenaExtensions: " << num_arena_extensions << "\n"
<< "NumArenaShrinkages: " << num_arena_shrinkages << "\n"
<< "MaxAllocSize: " << max_alloc_size << "\n";
return ss.str();
}
};

/// CUDA device memory allocator using cudaMalloc/cudaFree.
/// Lifetime is managed by the EP factory (ReleaseAllocatorImpl), not by a Release callback.
class CudaDeviceAllocator final : public CudaAllocatorBase {
Expand Down
Loading
Loading