microsoft · hariharans29 · Jul 22, 2021 · Jun 15, 2021 · Jun 15, 2021 · Jun 16, 2021
diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h
@@ -5,6 +5,7 @@
 
 #include "core/common/common.h"
 #include "core/framework/fence.h"
+#include "core/framework/allocator_stats.h"
 #include "core/session/onnxruntime_c_api.h"
 #include "ortdevice.h"
 #include "ortmemoryinfo.h"
@@ -55,9 +56,23 @@ class IAllocator {
   @remarks Use SafeInt when calculating the size of memory to allocate using Alloc.
   */
   virtual void* Alloc(size_t size) = 0;
+
   virtual void Free(void* p) = 0;
+
+  // TODO: Find a better name than Reserve() and update in all places.
+  // Reserve() is an interface exposed for an implementation of IAllocator
+  // to optionally implement some allocation logic that by-passes any arena-based
+  // logic that may be housed in the Alloc() implementation.
+  // There are SessionOptions config(s) that allow users to allocate some memory
+  // by-passing arena-based logic.
+  // By default, the base implementation  just calls Alloc().
+  virtual void* Reserve(size_t size) { return Alloc(size); }
+
   const OrtMemoryInfo& Info() const { return memory_info_; };
 
+  // Each implementation of IAllocator can override and provide their own implementation
+  virtual void GetStats(AllocatorStats* /*stats*/) { return; }
+
   /**
      optional CreateFence interface, as provider like DML has its own fence
   */

diff --git a/include/onnxruntime/core/framework/tensor.h b/include/onnxruntime/core/framework/tensor.h
@@ -69,7 +69,7 @@ class Tensor final {
    * \param p_type Data type of the tensor
    * \param shape Shape of the tensor
    * \param p_data A preallocated buffer. Can be NULL if the shape is empty.
-   *              Tensor does not own the data and will not delete it
+   *              Tensor will own the memory and will delete it when the tensor instance is destructed.
    * \param deleter Allocator used to free the pre-allocated memory
    * \param offset Offset in bytes to start of Tensor within p_data. 
    */

diff --git a/include/onnxruntime/core/session/environment.h b/include/onnxruntime/core/session/environment.h
@@ -74,6 +74,11 @@ class Environment {
     return shared_allocators_;
   }
 
+  /**
+   * Removes registered allocator that was previously registered for sharing between multiple sessions.
+  */
+  Status UnregisterAllocator(const OrtMemoryInfo& mem_info);
+
  private:
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Environment);
 

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -12,7 +12,7 @@
 #include <string.h>
 
 // This value is used in structures passed to ORT so that a newer version of ORT will still work with them
-#define ORT_API_VERSION 8
+#define ORT_API_VERSION 9
 
 #ifdef __cplusplus
 extern "C" {
@@ -675,9 +675,9 @@ struct OrtApi {
   ORT_API2_STATUS(AllocatorFree, _Inout_ OrtAllocator* ptr, void* p);
   ORT_API2_STATUS(AllocatorGetInfo, _In_ const OrtAllocator* ptr, _Outptr_ const struct OrtMemoryInfo** out);
 
+  // This API returns a CPU non-arena based allocator
   // The returned pointer doesn't have to be freed.
   // Always returns the same instance on every invocation.
-  // Please note that this is a non-arena based allocator.
   ORT_API2_STATUS(GetAllocatorWithDefaultOptions, _Outptr_ OrtAllocator** out);
 
   // Override symbolic dimensions (by specific denotation strings) with actual values if known at session initialization time to enable
@@ -1009,11 +1009,15 @@ struct OrtApi {
   ORT_API2_STATUS(AddSessionConfigEntry, _Inout_ OrtSessionOptions* options,
                   _In_z_ const char* config_key, _In_z_ const char* config_value);
 
-  /**
+  /** 
+   * This API returns an allocator bound to the provided OrtSession instance according 
+   * to the spec within mem_info if successful
    * \param sess valid OrtSession instance
    * \param mem_info - valid OrtMemoryInfo instance
-   * \param - out a ptr to a new instance of OrtAllocator according to the spec within mem_info
-   *         if successful
+   * \param - out a ptr to an instance of OrtAllocator which wraps the allocator 
+              bound to the OrtSession instance
+              Freeing the returned pointer only frees the OrtAllocator instance and not
+              the wrapped session owned allocator itself.
    * \return OrtStatus or nullptr if successful
    */
   ORT_API2_STATUS(CreateAllocator, _In_ const OrtSession* sess, _In_ const OrtMemoryInfo* mem_info,
@@ -1124,7 +1128,8 @@ struct OrtApi {
    * sharing between multiple sessions that use the same env instance.
    * Lifetime of the created allocator will be valid for the duration of the environment.
    * Returns an error if an allocator with the same OrtMemoryInfo is already registered.
-   * \param mem_info must be non-null.
+   * \param env OrtEnv instance (must be non-null).
+   * \param mem_info (must be non-null).
    * \param arena_cfg if nullptr defaults will be used.
    * See docs/C_API.md for details.
   */
@@ -1390,7 +1395,7 @@ struct OrtApi {
                   _In_ const OrtSessionOptions* options, _Inout_ OrtPrepackedWeightsContainer* prepacked_weights_container,
                   _Outptr_ OrtSession** out);
 
-  /**
+  /*   
    * Append TensorRT execution provider to the session options with TensorRT provider options.
    * If TensorRT is not available (due to a non TensorRT enabled build), this function will return failure.
    * Note: this API is slightly different than SessionOptionsAppendExecutionProvider_TensorRT.
@@ -1425,9 +1430,9 @@ struct OrtApi {
   * \param num_keys - number of keys
   */
   ORT_API2_STATUS(UpdateTensorRTProviderOptions, _Inout_ OrtTensorRTProviderOptionsV2* tensorrt_options,
-                 _In_reads_(num_keys) const char* const* provider_options_keys,
-                 _In_reads_(num_keys) const char* const* provider_options_values,
-                 _In_ size_t num_keys);
+                  _In_reads_(num_keys) const char* const* provider_options_keys,
+                  _In_reads_(num_keys) const char* const* provider_options_values,
+                  _In_ size_t num_keys);
 
   /**
   * Get serialized TensorRT provider options string.
@@ -1446,10 +1451,32 @@ struct OrtApi {
   */
   ORT_CLASS_RELEASE2(TensorRTProviderOptions);
 
-  /**
+  /*
   * Enable custom operators in onnxruntime-extensions: https://github.com/microsoft/onnxruntime-extensions.git
   */
   ORT_API2_STATUS(EnableOrtCustomOps, _Inout_ OrtSessionOptions* options);
+
+  /**
+   * Registers a custom allocator instance with the env to enable
+   * sharing between multiple sessions that use the same env instance.
+   * Returns an error if an allocator with the same OrtMemoryInfo is already registered.
+   * \param env OrtEnv instance (must be non-null).
+   * \param allocator user provided allocator (must be non-null).
+   * The behavior of this API is exactly the same as CreateAndRegisterAllocator() except
+   * instead of ORT creating an allocator based on provided info, in this case 
+   * ORT uses the user-provided custom allocator.
+   * See docs/C_API.md for details.
+  */
+  ORT_API2_STATUS(RegisterAllocator, _Inout_ OrtEnv* env, _In_ OrtAllocator* allocator);
+
+  /**
+   * Unregisters a registered allocator for sharing across sessions 
+   * based on provided OrtMemoryInfo.
+   * It is an error if you provide an OrtmemoryInfo not corresponding to any
+   * registered allocators for sharing.
+  */
+  ORT_API2_STATUS(UnregisterAllocator, _Inout_ OrtEnv* env,
+                  _In_ const OrtMemoryInfo* mem_info);
 };
 
 /*

diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc
@@ -42,8 +42,6 @@ void* MiMallocAllocator::Alloc(size_t size) {
 void MiMallocAllocator::Free(void* p) {
   mi_free(p);
 }
-
-const OrtMemoryInfo& MiMallocAllocator::Info() const { return *memory_info_; }
 #endif
 
 void* CPUAllocator::Alloc(size_t size) {

diff --git a/onnxruntime/core/framework/arena.h → onnxruntime/core/framework/allocator_stats.h b/onnxruntime/core/framework/arena.h → onnxruntime/core/framework/allocator_stats.h
@@ -4,38 +4,9 @@
 #pragma once
 
 #include <string>
-
-#include "core/common/common.h"
-#include "core/framework/allocator.h"
+#include <sstream>
 
 namespace onnxruntime {
-// The interface for arena which manage memory allocations
-// Arena will hold a pool of pre-allocate memories and manage their lifecycle.
-// Need an underline IResourceAllocator to allocate memories.
-// The setting like max_chunk_size is init by IDeviceDescriptor from resource allocator
-class IArenaAllocator : public IAllocator {
- public:
-  IArenaAllocator(const OrtMemoryInfo& info) : IAllocator(info) {}
-  ~IArenaAllocator() override = default;
-  // Alloc call needs to be thread safe.
-  void* Alloc(size_t size) override = 0;
-  // The chunk allocated by Reserve call won't be reused with other request
-  // (i.e.) it is not maintained by the arena and
-  // it will be return to the devices when it is freed.
-  // Reserve call needs to be thread safe.
-  virtual void* Reserve(size_t size) = 0;
-  // Free call needs to be thread safe.
-  void Free(void* p) override = 0;
-  // All unused device allocations maintained by the arena
-  // (i.e.) physical allocations with no chunks in use will be de-allocated.
-  // Shrink call needs to be thread safe.
-  virtual Status Shrink() = 0;
-  virtual size_t Used() const = 0;
-  virtual size_t Max() const = 0;
-  // allocate host pinned memory?
-};
-
-using ArenaPtr = std::shared_ptr<IArenaAllocator>;
 
 // Runtime statistics collected by an allocator.
 struct AllocatorStats {

diff --git a/onnxruntime/core/framework/allocatormgr.cc b/onnxruntime/core/framework/allocatormgr.cc
@@ -3,7 +3,7 @@
 
 #include "core/framework/allocatormgr.h"
 #include "core/framework/bfc_arena.h"
-#include "core/framework/mimalloc_arena.h"
+#include "core/framework/mimalloc_allocator.h"
 #include "core/common/logging/logging.h"
 #include <mutex>
 #include <sstream>
@@ -48,11 +48,11 @@ AllocatorPtr CreateAllocator(const AllocatorCreationInfo& info) {
         return nullptr;
     }
 
-#ifdef USE_MIMALLOC
-    return std::shared_ptr<IArenaAllocator>(
-        std::make_unique<MiMallocArena>(std::move(device_allocator), max_mem));
+#ifdef USE_MIMALLOC_ARENA_ALLOCATOR
+    return std::shared_ptr<IAllocator>(
+        std::make_unique<MiMallocAllocator>(max_mem));
 #else
-    return std::shared_ptr<IArenaAllocator>(
+    return std::shared_ptr<IAllocator>(
         std::make_unique<BFCArena>(std::move(device_allocator),
                                    max_mem,
                                    arena_extend_str,

diff --git a/onnxruntime/core/framework/allocatormgr.h b/onnxruntime/core/framework/allocatormgr.h
@@ -19,14 +19,14 @@ using MemoryInfoSet = std::set<OrtMemoryInfo>;
 const int DEFAULT_CPU_ALLOCATOR_DEVICE_ID = 0;
 
 struct AllocatorCreationInfo {
-  AllocatorCreationInfo(AllocatorFactory device_alloc_factory0,
-                        OrtDevice::DeviceId device_id0 = 0,
-                        bool use_arena0 = true,
-                        OrtArenaCfg arena_cfg0 = {0, -1, -1, -1, -1})
-      : device_alloc_factory(device_alloc_factory0),
-        device_id(device_id0),
-        use_arena(use_arena0),
-        arena_cfg(arena_cfg0) {
+  AllocatorCreationInfo(AllocatorFactory device_alloc_factory,
+                        OrtDevice::DeviceId device_id = 0,
+                        bool use_arena = true,
+                        OrtArenaCfg arena_cfg = {0, -1, -1, -1, -1})
+      : device_alloc_factory(device_alloc_factory),
+        device_id(device_id),
+        use_arena(use_arena),
+        arena_cfg(arena_cfg) {
   }
 
   AllocatorFactory device_alloc_factory;
@@ -35,7 +35,7 @@ struct AllocatorCreationInfo {
   OrtArenaCfg arena_cfg;
 };
 
-// Returns an allocator based on the creation info provided.
+// Returns an allocator (an instance of IAllocator) based on the creation info provided.
 // Returns nullptr if an invalid value of info.arena_cfg.arena_extend_strategy is supplied.
 // Valid values can be found in onnxruntime_c_api.h.
 AllocatorPtr CreateAllocator(const AllocatorCreationInfo& info);

diff --git a/onnxruntime/core/framework/bfc_arena.cc b/onnxruntime/core/framework/bfc_arena.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "core/framework/allocator.h"
 #include "core/framework/bfc_arena.h"
 #include <type_traits>
 
@@ -11,11 +12,11 @@ BFCArena::BFCArena(std::unique_ptr<IAllocator> resource_allocator,
                    int initial_chunk_size_bytes,
                    int max_dead_bytes_per_chunk,
                    int initial_growth_chunk_size_bytes)
-    : IArenaAllocator(OrtMemoryInfo(resource_allocator->Info().name,
-                                    OrtAllocatorType::OrtArenaAllocator,
-                                    resource_allocator->Info().device,
-                                    resource_allocator->Info().id,
-                                    resource_allocator->Info().mem_type)),
+    : IAllocator(OrtMemoryInfo(resource_allocator->Info().name,
+                               OrtAllocatorType::OrtArenaAllocator,
+                               resource_allocator->Info().device,
+                               resource_allocator->Info().id,
+                               resource_allocator->Info().mem_type)),
       device_allocator_(std::move(resource_allocator)),
       free_chunks_list_(kInvalidChunkHandle),
       next_allocation_id_(1),

diff --git a/onnxruntime/core/framework/bfc_arena.h b/onnxruntime/core/framework/bfc_arena.h
@@ -28,8 +28,8 @@ limitations under the License.
 #include "core/common/safeint.h"
 
 #include "core/platform/ort_mutex.h"
-#include "core/framework/arena.h"
 #include "core/framework/arena_extend_strategy.h"
+#include "core/framework/allocator.h"
 
 #if defined(PLATFORM_WINDOWS)
 #include <intrin.h>
@@ -50,7 +50,7 @@ namespace onnxruntime {
 // coalescing.  One assumption we make is that the process using this
 // allocator owns pretty much all of the memory, and that nearly
 // all requests to allocate memory go through this interface.
-class BFCArena : public IArenaAllocator {
+class BFCArena : public IAllocator {
  public:
   static const ArenaExtendStrategy DEFAULT_ARENA_EXTEND_STRATEGY = ArenaExtendStrategy::kNextPowerOfTwo;
   static const int DEFAULT_INITIAL_CHUNK_SIZE_BYTES = 1 * 1024 * 1024;
@@ -81,24 +81,16 @@ class BFCArena : public IArenaAllocator {
   // `initial_growth_chunk_size_bytes_` but ultimately all
   // future allocation sizes are determined by the arena growth strategy
   // and the allocation request.
-  Status Shrink() override;
+  Status Shrink();
 
   void* Reserve(size_t size) override;
 
-  size_t Used() const override {
-    return static_cast<size_t>(stats_.bytes_in_use);
-  }
-
-  size_t Max() const override {
-    return memory_limit_;
-  }
-
   FencePtr CreateFence(const SessionState* session_state) override {
     // arena always rely on its device allocator to create fence
     return device_allocator_->CreateFence(session_state);
   }
 
-  void GetStats(AllocatorStats* stats);
+  void GetStats(AllocatorStats* stats) override;
 
   size_t RequestedSize(const void* ptr);