diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index feba60d58b9a2..06f805b6040d6 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -78,8 +78,8 @@ jobs: run: | set -e -x BINARY_SIZE_THRESHOLD_ARGS="" - echo "Binary size threshold in bytes: 1306224" - BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1306224" + echo "Binary size threshold in bytes: 1436672" + BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1436672" # Ensure ANDROID_NDK_HOME is available and get its real path if [ -z "$ANDROID_NDK_HOME" ]; then diff --git a/include/onnxruntime/core/framework/ortmemoryinfo.h b/include/onnxruntime/core/framework/ortmemoryinfo.h index d930b2289170d..1be81e77064d2 100644 --- a/include/onnxruntime/core/framework/ortmemoryinfo.h +++ b/include/onnxruntime/core/framework/ortmemoryinfo.h @@ -13,18 +13,14 @@ struct OrtMemoryInfo { OrtMemoryInfo() = default; // to allow default construction of Tensor // use string for name, so we could have customized allocator in execution provider. - const char* name = nullptr; + std::string name; OrtMemType mem_type = OrtMemTypeDefault; OrtAllocatorType alloc_type = OrtInvalidAllocator; OrtDevice device; - constexpr OrtMemoryInfo(const char* name_, OrtAllocatorType type_, OrtDevice device_ = OrtDevice(), - OrtMemType mem_type_ = OrtMemTypeDefault) -#if ((defined(__GNUC__) && __GNUC__ > 4) || defined(__clang__)) - // this causes a spurious error in CentOS gcc 4.8 build so disable if GCC version < 5 - __attribute__((nonnull)) -#endif - : name(name_), + OrtMemoryInfo(std::string name_, OrtAllocatorType type_, OrtDevice device_ = OrtDevice(), + OrtMemType mem_type_ = OrtMemTypeDefault) + : name(std::move(name_)), mem_type(mem_type_), alloc_type(type_), device(device_) { @@ -39,7 +35,7 @@ struct OrtMemoryInfo { if (device != other.device) return device < other.device; - return strcmp(name, other.name) < 0; + return name < other.name; } // This is to make OrtMemoryInfo a valid key in hash tables @@ -68,7 +64,7 @@ inline bool operator==(const OrtMemoryInfo& left, const OrtMemoryInfo& other) { return left.mem_type == other.mem_type && left.alloc_type == other.alloc_type && left.device == other.device && - strcmp(left.name, other.name) == 0; + left.name == other.name; } inline bool operator!=(const OrtMemoryInfo& lhs, const OrtMemoryInfo& rhs) { return !(lhs == rhs); } diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc index e1b9d1294fb9e..91b5b811a3529 100644 --- a/onnxruntime/core/framework/allocator.cc +++ b/onnxruntime/core/framework/allocator.cc @@ -6,6 +6,7 @@ #include "core/common/safeint.h" #include "core/common/status.h" #include "core/framework/allocator.h" +#include "core/framework/error_code_helper.h" #include "core/mlas/inc/mlas.h" #include "core/framework/utils.h" #include "core/session/ort_apis.h" @@ -185,22 +186,32 @@ std::ostream& operator<<(std::ostream& out, const OrtMemoryInfo& info) { return #endif ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtAllocatorType type, int id1, enum OrtMemType mem_type1, _Outptr_ OrtMemoryInfo** out) { + API_IMPL_BEGIN + + if (name1 == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "MemoryInfo name cannot be null."); + } + + if (out == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Output memory info cannot be null."); + } + auto device_id = static_cast(id1); if (strcmp(name1, onnxruntime::CPU) == 0) { *out = new OrtMemoryInfo(onnxruntime::CPU, type, OrtDevice(), mem_type1); } else if (strcmp(name1, onnxruntime::CUDA) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::CUDA, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NVIDIA, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::OpenVINO_GPU) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::OpenVINO_GPU, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::INTEL, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::HIP) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::HIP, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::AMD, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::WEBGPU_BUFFER) == 0 || @@ -212,38 +223,39 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtA } else if (strcmp(name1, onnxruntime::DML) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::DML, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::MICROSOFT, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::OpenVINO_RT_NPU) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::OpenVINO_RT_NPU, type, OrtDevice(OrtDevice::NPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::INTEL, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::CUDA_PINNED) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::CUDA_PINNED, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::NVIDIA, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::HIP_PINNED) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::HIP_PINNED, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::AMD, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::QNN_HTP_SHARED) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::QNN_HTP_SHARED, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::QUALCOMM, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::CPU_ALIGNED_4K) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::CPU_ALIGNED_4K, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NONE, device_id, onnxruntime::kAlloc4KAlignment), mem_type1); } else { return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Specified device is not supported. Try CreateMemoryInfo_V2."); } + API_IMPL_END return nullptr; } @@ -251,6 +263,16 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo_V2, _In_ const char* name, _In_ en _In_ uint32_t vendor_id, _In_ int32_t device_id, _In_ enum OrtDeviceMemoryType mem_type, _In_ size_t alignment, enum OrtAllocatorType type, _Outptr_ OrtMemoryInfo** out) { + API_IMPL_BEGIN + + if (name == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "MemoryInfo name cannot be null."); + } + + if (out == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Output memory info cannot be null."); + } + // map the public enum values to internal OrtDevice values OrtDevice::MemoryType mt = mem_type == OrtDeviceMemoryType_DEFAULT ? OrtDevice::MemType::DEFAULT : OrtDevice::MemType::HOST_ACCESSIBLE; @@ -275,6 +297,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo_V2, _In_ const char* name, _In_ en *out = new OrtMemoryInfo(name, type, OrtDevice{dt, mt, vendor_id, narrow(device_id), alignment}, mem_type == OrtDeviceMemoryType_DEFAULT ? OrtMemTypeDefault : OrtMemTypeCPU); + API_IMPL_END return nullptr; } @@ -283,7 +306,7 @@ ORT_API(void, OrtApis::ReleaseMemoryInfo, _Frees_ptr_opt_ OrtMemoryInfo* p) { de #pragma warning(pop) #endif ORT_API_STATUS_IMPL(OrtApis::MemoryInfoGetName, _In_ const OrtMemoryInfo* ptr, _Out_ const char** out) { - *out = ptr->name; + *out = ptr->name.c_str(); return nullptr; } diff --git a/onnxruntime/core/framework/bfc_arena.cc b/onnxruntime/core/framework/bfc_arena.cc index e0b50cd04173e..3a5af42d03cdd 100644 --- a/onnxruntime/core/framework/bfc_arena.cc +++ b/onnxruntime/core/framework/bfc_arena.cc @@ -13,7 +13,7 @@ BFCArena::BFCArena(std::unique_ptr resource_allocator, int max_dead_bytes_per_chunk, int initial_growth_chunk_size_bytes, int64_t max_power_of_two_extend_bytes) - : IAllocator(OrtMemoryInfo(resource_allocator->Info().name, + : IAllocator(OrtMemoryInfo(resource_allocator->Info().name.c_str(), OrtAllocatorType::OrtArenaAllocator, resource_allocator->Info().device, resource_allocator->Info().mem_type)), diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp index 9611cb82d5a62..6d8d5453b9fc0 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp @@ -927,7 +927,7 @@ namespace Dml bool IsGpuTensor(const onnxruntime::Tensor& tensor) { - return strcmp(tensor.Location().name, onnxruntime::CPU) && + return strcmp(tensor.Location().name.c_str(), onnxruntime::CPU) && !(tensor.Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput || tensor.Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput); } diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp index c601ee3c1d5e6..fe52f27b35bb8 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp @@ -98,7 +98,7 @@ namespace Windows::AI::MachineLearning::Adapter bool IsAllocationInterface(const ::OrtMemoryInfo& info) { - return strcmp(info.name, onnxruntime::CPU) && !(info.mem_type == ::OrtMemType::OrtMemTypeCPUOutput || info.mem_type == ::OrtMemType::OrtMemTypeCPUInput); + return strcmp(info.name.c_str(), onnxruntime::CPU) && !(info.mem_type == ::OrtMemType::OrtMemTypeCPUOutput || info.mem_type == ::OrtMemType::OrtMemTypeCPUInput); } // Translate the data object stored in a tensor to the type which will be returned through @@ -1774,7 +1774,9 @@ namespace Windows::AI::MachineLearning::Adapter } // tells caller whether this tensor is in CPU memory - return !strcmp(m_impl->Location().name, onnxruntime::CPU) || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput; + return !strcmp(m_impl->Location().name.c_str(), onnxruntime::CPU) + || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput + || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput; } bool STDMETHODCALLTYPE TensorWrapper::IsDataInterface() const noexcept diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index d34d6724a1c91..772b92e4d8711 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -192,7 +192,7 @@ Status WebGpuContext::Run(ComputeContext& context, const ProgramBase& program) { return tensor != nullptr && tensor->Location().mem_type == OrtMemType::OrtMemTypeDefault && tensor->Location().device.Type() == OrtDevice::GPU && - !strcmp(tensor->Location().name, WEBGPU_BUFFER); + !strcmp(tensor->Location().name.c_str(), WEBGPU_BUFFER); }), "All inputs must be tensors on WebGPU buffers."); @@ -201,7 +201,7 @@ Status WebGpuContext::Run(ComputeContext& context, const ProgramBase& program) { return tensor != nullptr && tensor->Location().mem_type == OrtMemType::OrtMemTypeDefault && tensor->Location().device.Type() == OrtDevice::GPU && - !strcmp(tensor->Location().name, WEBGPU_BUFFER); + !strcmp(tensor->Location().name.c_str(), WEBGPU_BUFFER); }), "All outputs must be tensors on WebGPU buffers."); } diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc index 39b785c327d56..9c40eb75780ee 100644 --- a/onnxruntime/core/session/environment.cc +++ b/onnxruntime/core/session/environment.cc @@ -79,7 +79,7 @@ static bool AreOrtMemoryInfosEquivalent( bool ignore_alignment = false) { return left.mem_type == right.mem_type && (ignore_alignment ? left.device.EqualIgnoringAlignment(right.device) : left.device == right.device) && - (!match_name || strcmp(left.name, right.name) == 0); + (!match_name || left.name == right.name); } std::vector::const_iterator FindExistingAllocator(const std::vector& allocators, diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index 85ea958981e2c..124d748029fd4 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -53,11 +53,11 @@ void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { static std::unique_ptr GetDataTransfer(const OrtMemoryInfo& mem_info) { std::unique_ptr data_transfer; - if (strcmp(mem_info.name, onnxruntime::CPU) == 0) { + if (mem_info.name == onnxruntime::CPU) { return data_transfer; } - if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { + if (mem_info.name == onnxruntime::CUDA) { #if defined(USE_CUDA) || defined(USE_CUDA_PROVIDER_INTERFACE) auto* cuda_provider_info = TryGetProviderInfo_CUDA(); if (cuda_provider_info != nullptr) { diff --git a/onnxruntime/test/framework/TestAllocatorManager.cc b/onnxruntime/test/framework/TestAllocatorManager.cc index 30f2686cd62f5..6440a805cdc59 100644 --- a/onnxruntime/test/framework/TestAllocatorManager.cc +++ b/onnxruntime/test/framework/TestAllocatorManager.cc @@ -10,7 +10,7 @@ namespace test { class DummyArena : public IAllocator { public: explicit DummyArena(std::unique_ptr resource_allocator) - : IAllocator(OrtMemoryInfo(resource_allocator->Info().name, + : IAllocator(OrtMemoryInfo(resource_allocator->Info().name.c_str(), OrtAllocatorType::OrtDeviceAllocator, resource_allocator->Info().device, resource_allocator->Info().mem_type)), diff --git a/onnxruntime/test/framework/allocator_test.cc b/onnxruntime/test/framework/allocator_test.cc index 3efba6f1b6e52..445e023746aaa 100644 --- a/onnxruntime/test/framework/allocator_test.cc +++ b/onnxruntime/test/framework/allocator_test.cc @@ -13,7 +13,7 @@ namespace test { TEST(AllocatorTest, CPUAllocatorTest) { auto cpu_arena = TestCPUExecutionProvider()->CreatePreferredAllocators()[0]; - ASSERT_STREQ(cpu_arena->Info().name, CPU); + ASSERT_STREQ(cpu_arena->Info().name.c_str(), CPU); EXPECT_EQ(cpu_arena->Info().device.Id(), 0); const auto expected_allocator_type = DoesCpuAllocatorSupportArenaUsage() diff --git a/onnxruntime/test/framework/tensor_test.cc b/onnxruntime/test/framework/tensor_test.cc index 2ac1a93013932..f08675271de21 100644 --- a/onnxruntime/test/framework/tensor_test.cc +++ b/onnxruntime/test/framework/tensor_test.cc @@ -29,7 +29,7 @@ void CPUTensorTest(std::vector dims, const int offset_elements = 0) { EXPECT_EQ(shape.GetDims(), tensor_shape.GetDims()); EXPECT_EQ(t.DataType(), DataTypeImpl::GetType()); auto& location = t.Location(); - EXPECT_STREQ(location.name, CPU); + EXPECT_STREQ(location.name.c_str(), CPU); EXPECT_EQ(location.device.Id(), 0); const T* t_data = t.Data(); @@ -47,7 +47,7 @@ void CPUTensorTest(std::vector dims, const int offset_elements = 0) { EXPECT_EQ(shape.GetDims(), tensor_shape.GetDims()); EXPECT_EQ(new_t.DataType(), DataTypeImpl::GetType()); auto& new_location = new_t.Location(); - ASSERT_STREQ(new_location.name, CPU); + ASSERT_STREQ(new_location.name.c_str(), CPU); EXPECT_EQ(new_location.device.Id(), 0); } } @@ -135,7 +135,7 @@ TEST(TensorTest, EmptyTensorTest) { EXPECT_TRUE(!data); auto& location = t.Location(); - ASSERT_STREQ(location.name, CPU); + ASSERT_STREQ(location.name.c_str(), CPU); EXPECT_EQ(location.device.Id(), 0); const auto expected_allocator_type = DoesCpuAllocatorSupportArenaUsage() @@ -160,7 +160,7 @@ TEST(TensorTest, StringTensorTest) { EXPECT_EQ(shape, tensor_shape); EXPECT_EQ(t.DataType(), DataTypeImpl::GetType()); auto& location = t.Location(); - ASSERT_STREQ(location.name, CPU); + ASSERT_EQ(location.name, CPU); EXPECT_EQ(location.device.Id(), 0); std::string* new_data = t.MutableData(); diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index e8291a36447ca..ecfaf34c8a076 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -216,7 +216,7 @@ TEST(LoraAdapterTest, VerifyDeviceCopy) { for (; begin != end; ++begin) { const auto& [_, param] = *begin; const auto& tensor_device = param.GetDeviceOrMapped().Get(); - ASSERT_EQ(0, strcmp(tensor_device.Location().name, onnxruntime::CUDA)); + ASSERT_EQ(0, strcmp(tensor_device.Location().name.c_str(), onnxruntime::CUDA)); const auto& tensor_cpu = param.GetMapped().Get(); ASSERT_EQ(tensor_cpu.Shape().Size(), tensor_device.Shape().Size()); diff --git a/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc b/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc index 91a4fe9a54251..af49bd0e3d58d 100644 --- a/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc @@ -24,7 +24,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) { size_t size = 1024; - EXPECT_STREQ(cuda_arena->Info().name, CUDA); + EXPECT_STREQ(cuda_arena->Info().name.c_str(), CUDA); EXPECT_EQ(cuda_arena->Info().device.Id(), cuda_device_id); EXPECT_EQ(cuda_arena->Info().mem_type, OrtMemTypeDefault); EXPECT_EQ(cuda_arena->Info().alloc_type, OrtArenaAllocator); @@ -38,7 +38,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) { auto pinned_allocator = CreateAllocator(pinned_memory_info); - EXPECT_STREQ(pinned_allocator->Info().name, CUDA_PINNED); + EXPECT_STREQ(pinned_allocator->Info().name.c_str(), CUDA_PINNED); EXPECT_EQ(pinned_allocator->Info().device.Id(), 0); EXPECT_EQ(pinned_allocator->Info().mem_type, OrtMemTypeCPUOutput); EXPECT_EQ(pinned_allocator->Info().alloc_type, OrtArenaAllocator); @@ -50,7 +50,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) { AllocatorCreationInfo cpu_memory_info( [](int) { return std::make_unique(); }, true); const auto& cpu_arena = CreateAllocator(cpu_memory_info); - EXPECT_STREQ(cpu_arena->Info().name, CPU); + EXPECT_STREQ(cpu_arena->Info().name.c_str(), CPU); EXPECT_EQ(cpu_arena->Info().device.Id(), 0); EXPECT_EQ(cpu_arena->Info().mem_type, OrtMemTypeDefault); EXPECT_EQ(cpu_arena->Info().alloc_type, OrtArenaAllocator);