Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions include/onnxruntime/core/framework/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "core/common/common.h"
#include "core/framework/fence.h"
#include "core/framework/allocator_stats.h"
#include "core/session/onnxruntime_c_api.h"
#include "ortdevice.h"
#include "ortmemoryinfo.h"
Expand Down Expand Up @@ -55,9 +56,23 @@ class IAllocator {
@remarks Use SafeInt when calculating the size of memory to allocate using Alloc.
*/
virtual void* Alloc(size_t size) = 0;

virtual void Free(void* p) = 0;

// TODO: Find a better name than Reserve() and update in all places.
// Reserve() is an interface exposed for an implementation of IAllocator
// to optionally implement some allocation logic that by-passes any arena-based
// logic that may be housed in the Alloc() implementation.
// There are SessionOptions config(s) that allow users to allocate some memory
// by-passing arena-based logic.
// By default, the base implementation just calls Alloc().
virtual void* Reserve(size_t size) { return Alloc(size); }

const OrtMemoryInfo& Info() const { return memory_info_; };

// Each implementation of IAllocator can override and provide their own implementation
virtual void GetStats(AllocatorStats* /*stats*/) { return; }

/**
optional CreateFence interface, as provider like DML has its own fence
*/
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/framework/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class Tensor final {
* \param p_type Data type of the tensor
* \param shape Shape of the tensor
* \param p_data A preallocated buffer. Can be NULL if the shape is empty.
* Tensor does not own the data and will not delete it
* Tensor will own the memory and will delete it when the tensor instance is destructed.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment was wrong - fixing it

* \param deleter Allocator used to free the pre-allocated memory
* \param offset Offset in bytes to start of Tensor within p_data.
*/
Expand Down
5 changes: 5 additions & 0 deletions include/onnxruntime/core/session/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ class Environment {
return shared_allocators_;
}

/**
* Removes registered allocator that was previously registered for sharing between multiple sessions.
*/
Status UnregisterAllocator(const OrtMemoryInfo& mem_info);

private:
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Environment);

Expand Down
49 changes: 38 additions & 11 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <string.h>

// This value is used in structures passed to ORT so that a newer version of ORT will still work with them
#define ORT_API_VERSION 8
#define ORT_API_VERSION 9

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -675,9 +675,9 @@ struct OrtApi {
ORT_API2_STATUS(AllocatorFree, _Inout_ OrtAllocator* ptr, void* p);
ORT_API2_STATUS(AllocatorGetInfo, _In_ const OrtAllocator* ptr, _Outptr_ const struct OrtMemoryInfo** out);

// This API returns a CPU non-arena based allocator
// The returned pointer doesn't have to be freed.
// Always returns the same instance on every invocation.
// Please note that this is a non-arena based allocator.
ORT_API2_STATUS(GetAllocatorWithDefaultOptions, _Outptr_ OrtAllocator** out);

// Override symbolic dimensions (by specific denotation strings) with actual values if known at session initialization time to enable
Expand Down Expand Up @@ -1009,11 +1009,15 @@ struct OrtApi {
ORT_API2_STATUS(AddSessionConfigEntry, _Inout_ OrtSessionOptions* options,
_In_z_ const char* config_key, _In_z_ const char* config_value);

/**
/**
* This API returns an allocator bound to the provided OrtSession instance according
* to the spec within mem_info if successful
* \param sess valid OrtSession instance
* \param mem_info - valid OrtMemoryInfo instance
* \param - out a ptr to a new instance of OrtAllocator according to the spec within mem_info
* if successful
* \param - out a ptr to an instance of OrtAllocator which wraps the allocator
bound to the OrtSession instance
Freeing the returned pointer only frees the OrtAllocator instance and not
the wrapped session owned allocator itself.
* \return OrtStatus or nullptr if successful
*/
ORT_API2_STATUS(CreateAllocator, _In_ const OrtSession* sess, _In_ const OrtMemoryInfo* mem_info,
Expand Down Expand Up @@ -1124,7 +1128,8 @@ struct OrtApi {
* sharing between multiple sessions that use the same env instance.
* Lifetime of the created allocator will be valid for the duration of the environment.
* Returns an error if an allocator with the same OrtMemoryInfo is already registered.
* \param mem_info must be non-null.
* \param env OrtEnv instance (must be non-null).
* \param mem_info (must be non-null).
* \param arena_cfg if nullptr defaults will be used.
* See docs/C_API.md for details.
*/
Expand Down Expand Up @@ -1390,7 +1395,7 @@ struct OrtApi {
_In_ const OrtSessionOptions* options, _Inout_ OrtPrepackedWeightsContainer* prepacked_weights_container,
_Outptr_ OrtSession** out);

/**
/*
* Append TensorRT execution provider to the session options with TensorRT provider options.
* If TensorRT is not available (due to a non TensorRT enabled build), this function will return failure.
* Note: this API is slightly different than SessionOptionsAppendExecutionProvider_TensorRT.
Expand Down Expand Up @@ -1425,9 +1430,9 @@ struct OrtApi {
* \param num_keys - number of keys
*/
ORT_API2_STATUS(UpdateTensorRTProviderOptions, _Inout_ OrtTensorRTProviderOptionsV2* tensorrt_options,
_In_reads_(num_keys) const char* const* provider_options_keys,
_In_reads_(num_keys) const char* const* provider_options_values,
_In_ size_t num_keys);
_In_reads_(num_keys) const char* const* provider_options_keys,
_In_reads_(num_keys) const char* const* provider_options_values,
_In_ size_t num_keys);

/**
* Get serialized TensorRT provider options string.
Expand All @@ -1446,10 +1451,32 @@ struct OrtApi {
*/
ORT_CLASS_RELEASE2(TensorRTProviderOptions);

/**
/*
* Enable custom operators in onnxruntime-extensions: https://github.com/microsoft/onnxruntime-extensions.git
*/
ORT_API2_STATUS(EnableOrtCustomOps, _Inout_ OrtSessionOptions* options);

/**
* Registers a custom allocator instance with the env to enable
* sharing between multiple sessions that use the same env instance.
* Returns an error if an allocator with the same OrtMemoryInfo is already registered.
* \param env OrtEnv instance (must be non-null).
* \param allocator user provided allocator (must be non-null).
* The behavior of this API is exactly the same as CreateAndRegisterAllocator() except
* instead of ORT creating an allocator based on provided info, in this case
* ORT uses the user-provided custom allocator.
* See docs/C_API.md for details.
*/
ORT_API2_STATUS(RegisterAllocator, _Inout_ OrtEnv* env, _In_ OrtAllocator* allocator);

/**
* Unregisters a registered allocator for sharing across sessions
* based on provided OrtMemoryInfo.
* It is an error if you provide an OrtmemoryInfo not corresponding to any
* registered allocators for sharing.
*/
ORT_API2_STATUS(UnregisterAllocator, _Inout_ OrtEnv* env,
_In_ const OrtMemoryInfo* mem_info);
};

/*
Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/core/framework/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ void* MiMallocAllocator::Alloc(size_t size) {
void MiMallocAllocator::Free(void* p) {
mi_free(p);
}

const OrtMemoryInfo& MiMallocAllocator::Info() const { return *memory_info_; }
#endif

void* CPUAllocator::Alloc(size_t size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,9 @@
#pragma once

#include <string>

#include "core/common/common.h"
#include "core/framework/allocator.h"
#include <sstream>

namespace onnxruntime {
// The interface for arena which manage memory allocations
// Arena will hold a pool of pre-allocate memories and manage their lifecycle.
// Need an underline IResourceAllocator to allocate memories.
// The setting like max_chunk_size is init by IDeviceDescriptor from resource allocator
class IArenaAllocator : public IAllocator {
public:
IArenaAllocator(const OrtMemoryInfo& info) : IAllocator(info) {}
~IArenaAllocator() override = default;
// Alloc call needs to be thread safe.
void* Alloc(size_t size) override = 0;
// The chunk allocated by Reserve call won't be reused with other request
// (i.e.) it is not maintained by the arena and
// it will be return to the devices when it is freed.
// Reserve call needs to be thread safe.
virtual void* Reserve(size_t size) = 0;
// Free call needs to be thread safe.
void Free(void* p) override = 0;
// All unused device allocations maintained by the arena
// (i.e.) physical allocations with no chunks in use will be de-allocated.
// Shrink call needs to be thread safe.
virtual Status Shrink() = 0;
virtual size_t Used() const = 0;
virtual size_t Max() const = 0;
// allocate host pinned memory?
};

using ArenaPtr = std::shared_ptr<IArenaAllocator>;

// Runtime statistics collected by an allocator.
struct AllocatorStats {
Expand Down
10 changes: 5 additions & 5 deletions onnxruntime/core/framework/allocatormgr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "core/framework/allocatormgr.h"
#include "core/framework/bfc_arena.h"
#include "core/framework/mimalloc_arena.h"
#include "core/framework/mimalloc_allocator.h"
#include "core/common/logging/logging.h"
#include <mutex>
#include <sstream>
Expand Down Expand Up @@ -48,11 +48,11 @@ AllocatorPtr CreateAllocator(const AllocatorCreationInfo& info) {
return nullptr;
}

#ifdef USE_MIMALLOC
return std::shared_ptr<IArenaAllocator>(
std::make_unique<MiMallocArena>(std::move(device_allocator), max_mem));
#ifdef USE_MIMALLOC_ARENA_ALLOCATOR
return std::shared_ptr<IAllocator>(
std::make_unique<MiMallocAllocator>(max_mem));
#else
return std::shared_ptr<IArenaAllocator>(
return std::shared_ptr<IAllocator>(
std::make_unique<BFCArena>(std::move(device_allocator),
max_mem,
arena_extend_str,
Expand Down
18 changes: 9 additions & 9 deletions onnxruntime/core/framework/allocatormgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ using MemoryInfoSet = std::set<OrtMemoryInfo>;
const int DEFAULT_CPU_ALLOCATOR_DEVICE_ID = 0;

struct AllocatorCreationInfo {
AllocatorCreationInfo(AllocatorFactory device_alloc_factory0,
OrtDevice::DeviceId device_id0 = 0,
bool use_arena0 = true,
OrtArenaCfg arena_cfg0 = {0, -1, -1, -1, -1})
: device_alloc_factory(device_alloc_factory0),
device_id(device_id0),
use_arena(use_arena0),
arena_cfg(arena_cfg0) {
AllocatorCreationInfo(AllocatorFactory device_alloc_factory,
OrtDevice::DeviceId device_id = 0,
bool use_arena = true,
OrtArenaCfg arena_cfg = {0, -1, -1, -1, -1})
: device_alloc_factory(device_alloc_factory),
device_id(device_id),
use_arena(use_arena),
arena_cfg(arena_cfg) {
}

AllocatorFactory device_alloc_factory;
Expand All @@ -35,7 +35,7 @@ struct AllocatorCreationInfo {
OrtArenaCfg arena_cfg;
};

// Returns an allocator based on the creation info provided.
// Returns an allocator (an instance of IAllocator) based on the creation info provided.
// Returns nullptr if an invalid value of info.arena_cfg.arena_extend_strategy is supplied.
// Valid values can be found in onnxruntime_c_api.h.
AllocatorPtr CreateAllocator(const AllocatorCreationInfo& info);
Expand Down
11 changes: 6 additions & 5 deletions onnxruntime/core/framework/bfc_arena.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/framework/allocator.h"
#include "core/framework/bfc_arena.h"
#include <type_traits>

Expand All @@ -11,11 +12,11 @@ BFCArena::BFCArena(std::unique_ptr<IAllocator> resource_allocator,
int initial_chunk_size_bytes,
int max_dead_bytes_per_chunk,
int initial_growth_chunk_size_bytes)
: IArenaAllocator(OrtMemoryInfo(resource_allocator->Info().name,
OrtAllocatorType::OrtArenaAllocator,
resource_allocator->Info().device,
resource_allocator->Info().id,
resource_allocator->Info().mem_type)),
: IAllocator(OrtMemoryInfo(resource_allocator->Info().name,
OrtAllocatorType::OrtArenaAllocator,
resource_allocator->Info().device,
resource_allocator->Info().id,
resource_allocator->Info().mem_type)),
device_allocator_(std::move(resource_allocator)),
free_chunks_list_(kInvalidChunkHandle),
next_allocation_id_(1),
Expand Down
16 changes: 4 additions & 12 deletions onnxruntime/core/framework/bfc_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ limitations under the License.
#include "core/common/safeint.h"

#include "core/platform/ort_mutex.h"
#include "core/framework/arena.h"
#include "core/framework/arena_extend_strategy.h"
#include "core/framework/allocator.h"

#if defined(PLATFORM_WINDOWS)
#include <intrin.h>
Expand All @@ -50,7 +50,7 @@ namespace onnxruntime {
// coalescing. One assumption we make is that the process using this
// allocator owns pretty much all of the memory, and that nearly
// all requests to allocate memory go through this interface.
class BFCArena : public IArenaAllocator {
class BFCArena : public IAllocator {
public:
static const ArenaExtendStrategy DEFAULT_ARENA_EXTEND_STRATEGY = ArenaExtendStrategy::kNextPowerOfTwo;
static const int DEFAULT_INITIAL_CHUNK_SIZE_BYTES = 1 * 1024 * 1024;
Expand Down Expand Up @@ -81,24 +81,16 @@ class BFCArena : public IArenaAllocator {
// `initial_growth_chunk_size_bytes_` but ultimately all
// future allocation sizes are determined by the arena growth strategy
// and the allocation request.
Status Shrink() override;
Status Shrink();

void* Reserve(size_t size) override;

size_t Used() const override {
return static_cast<size_t>(stats_.bytes_in_use);
}

size_t Max() const override {
return memory_limit_;
}

FencePtr CreateFence(const SessionState* session_state) override {
// arena always rely on its device allocator to create fence
return device_allocator_->CreateFence(session_state);
}

void GetStats(AllocatorStats* stats);
void GetStats(AllocatorStats* stats) override;

size_t RequestedSize(const void* ptr);

Expand Down
Loading