diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index 8df0064e06a1d..87f93cd0f6ea7 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -78,8 +78,8 @@ jobs: run: | set -e -x BINARY_SIZE_THRESHOLD_ARGS="" - echo "Binary size threshold in bytes: 1306224" - BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1306224" + echo "Binary size threshold in bytes: 1436672" + BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1436672" # Ensure ANDROID_NDK_HOME is available and get its real path if [ -z "$ANDROID_NDK_HOME" ]; then diff --git a/include/onnxruntime/core/framework/ortmemoryinfo.h b/include/onnxruntime/core/framework/ortmemoryinfo.h index d930b2289170d..1be81e77064d2 100644 --- a/include/onnxruntime/core/framework/ortmemoryinfo.h +++ b/include/onnxruntime/core/framework/ortmemoryinfo.h @@ -13,18 +13,14 @@ struct OrtMemoryInfo { OrtMemoryInfo() = default; // to allow default construction of Tensor // use string for name, so we could have customized allocator in execution provider. - const char* name = nullptr; + std::string name; OrtMemType mem_type = OrtMemTypeDefault; OrtAllocatorType alloc_type = OrtInvalidAllocator; OrtDevice device; - constexpr OrtMemoryInfo(const char* name_, OrtAllocatorType type_, OrtDevice device_ = OrtDevice(), - OrtMemType mem_type_ = OrtMemTypeDefault) -#if ((defined(__GNUC__) && __GNUC__ > 4) || defined(__clang__)) - // this causes a spurious error in CentOS gcc 4.8 build so disable if GCC version < 5 - __attribute__((nonnull)) -#endif - : name(name_), + OrtMemoryInfo(std::string name_, OrtAllocatorType type_, OrtDevice device_ = OrtDevice(), + OrtMemType mem_type_ = OrtMemTypeDefault) + : name(std::move(name_)), mem_type(mem_type_), alloc_type(type_), device(device_) { @@ -39,7 +35,7 @@ struct OrtMemoryInfo { if (device != other.device) return device < other.device; - return strcmp(name, other.name) < 0; + return name < other.name; } // This is to make OrtMemoryInfo a valid key in hash tables @@ -68,7 +64,7 @@ inline bool operator==(const OrtMemoryInfo& left, const OrtMemoryInfo& other) { return left.mem_type == other.mem_type && left.alloc_type == other.alloc_type && left.device == other.device && - strcmp(left.name, other.name) == 0; + left.name == other.name; } inline bool operator!=(const OrtMemoryInfo& lhs, const OrtMemoryInfo& rhs) { return !(lhs == rhs); } diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc index e1b9d1294fb9e..91b5b811a3529 100644 --- a/onnxruntime/core/framework/allocator.cc +++ b/onnxruntime/core/framework/allocator.cc @@ -6,6 +6,7 @@ #include "core/common/safeint.h" #include "core/common/status.h" #include "core/framework/allocator.h" +#include "core/framework/error_code_helper.h" #include "core/mlas/inc/mlas.h" #include "core/framework/utils.h" #include "core/session/ort_apis.h" @@ -185,22 +186,32 @@ std::ostream& operator<<(std::ostream& out, const OrtMemoryInfo& info) { return #endif ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtAllocatorType type, int id1, enum OrtMemType mem_type1, _Outptr_ OrtMemoryInfo** out) { + API_IMPL_BEGIN + + if (name1 == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "MemoryInfo name cannot be null."); + } + + if (out == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Output memory info cannot be null."); + } + auto device_id = static_cast(id1); if (strcmp(name1, onnxruntime::CPU) == 0) { *out = new OrtMemoryInfo(onnxruntime::CPU, type, OrtDevice(), mem_type1); } else if (strcmp(name1, onnxruntime::CUDA) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::CUDA, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NVIDIA, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::OpenVINO_GPU) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::OpenVINO_GPU, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::INTEL, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::HIP) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::HIP, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::AMD, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::WEBGPU_BUFFER) == 0 || @@ -212,38 +223,39 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtA } else if (strcmp(name1, onnxruntime::DML) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::DML, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::MICROSOFT, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::OpenVINO_RT_NPU) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::OpenVINO_RT_NPU, type, OrtDevice(OrtDevice::NPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::INTEL, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::CUDA_PINNED) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::CUDA_PINNED, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::NVIDIA, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::HIP_PINNED) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::HIP_PINNED, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::AMD, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::QNN_HTP_SHARED) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::QNN_HTP_SHARED, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HOST_ACCESSIBLE, OrtDevice::VendorIds::QUALCOMM, device_id), mem_type1); } else if (strcmp(name1, onnxruntime::CPU_ALIGNED_4K) == 0) { *out = new OrtMemoryInfo( - name1, type, + onnxruntime::CPU_ALIGNED_4K, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NONE, device_id, onnxruntime::kAlloc4KAlignment), mem_type1); } else { return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Specified device is not supported. Try CreateMemoryInfo_V2."); } + API_IMPL_END return nullptr; } @@ -251,6 +263,16 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo_V2, _In_ const char* name, _In_ en _In_ uint32_t vendor_id, _In_ int32_t device_id, _In_ enum OrtDeviceMemoryType mem_type, _In_ size_t alignment, enum OrtAllocatorType type, _Outptr_ OrtMemoryInfo** out) { + API_IMPL_BEGIN + + if (name == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "MemoryInfo name cannot be null."); + } + + if (out == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Output memory info cannot be null."); + } + // map the public enum values to internal OrtDevice values OrtDevice::MemoryType mt = mem_type == OrtDeviceMemoryType_DEFAULT ? OrtDevice::MemType::DEFAULT : OrtDevice::MemType::HOST_ACCESSIBLE; @@ -275,6 +297,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo_V2, _In_ const char* name, _In_ en *out = new OrtMemoryInfo(name, type, OrtDevice{dt, mt, vendor_id, narrow(device_id), alignment}, mem_type == OrtDeviceMemoryType_DEFAULT ? OrtMemTypeDefault : OrtMemTypeCPU); + API_IMPL_END return nullptr; } @@ -283,7 +306,7 @@ ORT_API(void, OrtApis::ReleaseMemoryInfo, _Frees_ptr_opt_ OrtMemoryInfo* p) { de #pragma warning(pop) #endif ORT_API_STATUS_IMPL(OrtApis::MemoryInfoGetName, _In_ const OrtMemoryInfo* ptr, _Out_ const char** out) { - *out = ptr->name; + *out = ptr->name.c_str(); return nullptr; } diff --git a/onnxruntime/core/framework/bfc_arena.cc b/onnxruntime/core/framework/bfc_arena.cc index e0b50cd04173e..3a5af42d03cdd 100644 --- a/onnxruntime/core/framework/bfc_arena.cc +++ b/onnxruntime/core/framework/bfc_arena.cc @@ -13,7 +13,7 @@ BFCArena::BFCArena(std::unique_ptr resource_allocator, int max_dead_bytes_per_chunk, int initial_growth_chunk_size_bytes, int64_t max_power_of_two_extend_bytes) - : IAllocator(OrtMemoryInfo(resource_allocator->Info().name, + : IAllocator(OrtMemoryInfo(resource_allocator->Info().name.c_str(), OrtAllocatorType::OrtArenaAllocator, resource_allocator->Info().device, resource_allocator->Info().mem_type)), diff --git a/onnxruntime/core/mlas/lib/qnbitgemm.h b/onnxruntime/core/mlas/lib/qnbitgemm.h index 06e8e49b59e2e..7ec80c6d67f15 100644 --- a/onnxruntime/core/mlas/lib/qnbitgemm.h +++ b/onnxruntime/core/mlas/lib/qnbitgemm.h @@ -53,16 +53,25 @@ struct PackedQuantBDataStruct { { const size_t PackedQuantBDataSize = N * BlockCountK * MlasQNBitBlkDataSizeInBytes(BlkBitWidth, BlkLen); size_t BlkSumSize = MlasDivRoundup(N, 16) * BlockCountK * 16 * sizeof(T); - if constexpr (BlkBitWidth == 8) { - PackedQuantBData = (std::byte*)MlasAlignAddress(PackedQuantBWorkspace, 32); - } else { #if defined(MLAS_TARGET_AMD64_IX86) // avx512 requires alignment on a 64-byte boundary PackedQuantBData = (std::byte*)MlasAlignAddress(PackedQuantBWorkspace, 64); +#elif defined (MLAS_TARGET_ARM64) + // Only for 8-bit Gemms is the `PackedQuantBData` is to be 32-byte aligned and + // there is enough memory allocated to support this alignment. + // See QNBitGemmPackQuantBDataSize(). + // When bit width is 4, there is no alignment guarantee. + // TODO(hasesh): Can we unify the alignment for 4-bit and 8-bit ARM64 Gemms so as to + // simpify this logic and make code here cleaner ? + if constexpr (BlkBitWidth == 8) { + PackedQuantBData = (std::byte*)MlasAlignAddress(PackedQuantBWorkspace, 32); + } + else { + PackedQuantBData = (std::byte*)PackedQuantBWorkspace; + } #else PackedQuantBData = (std::byte*)PackedQuantBWorkspace; #endif - } QuantBBlkSum = (T*)(PackedQuantBData + PackedQuantBDataSize); QuantBBlkSum = (T*)MlasAlignAddress(QuantBBlkSum, MlasQNBitQuantBBlkSumAlignment()); diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp index 9611cb82d5a62..6d8d5453b9fc0 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp @@ -927,7 +927,7 @@ namespace Dml bool IsGpuTensor(const onnxruntime::Tensor& tensor) { - return strcmp(tensor.Location().name, onnxruntime::CPU) && + return strcmp(tensor.Location().name.c_str(), onnxruntime::CPU) && !(tensor.Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput || tensor.Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput); } diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp index c601ee3c1d5e6..fe52f27b35bb8 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp @@ -98,7 +98,7 @@ namespace Windows::AI::MachineLearning::Adapter bool IsAllocationInterface(const ::OrtMemoryInfo& info) { - return strcmp(info.name, onnxruntime::CPU) && !(info.mem_type == ::OrtMemType::OrtMemTypeCPUOutput || info.mem_type == ::OrtMemType::OrtMemTypeCPUInput); + return strcmp(info.name.c_str(), onnxruntime::CPU) && !(info.mem_type == ::OrtMemType::OrtMemTypeCPUOutput || info.mem_type == ::OrtMemType::OrtMemTypeCPUInput); } // Translate the data object stored in a tensor to the type which will be returned through @@ -1774,7 +1774,9 @@ namespace Windows::AI::MachineLearning::Adapter } // tells caller whether this tensor is in CPU memory - return !strcmp(m_impl->Location().name, onnxruntime::CPU) || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput; + return !strcmp(m_impl->Location().name.c_str(), onnxruntime::CPU) + || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUOutput + || m_impl->Location().mem_type == ::OrtMemType::OrtMemTypeCPUInput; } bool STDMETHODCALLTYPE TensorWrapper::IsDataInterface() const noexcept diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc index 8641952f27ee5..a7e553848fb4d 100644 --- a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc +++ b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc @@ -219,9 +219,11 @@ struct QnnEpFactory : OrtEpFactory { OrtKeyValuePairs* ep_options = nullptr; factory->ort_api.CreateKeyValuePairs(&ep_options); factory->ort_api.AddKeyValuePair(ep_options, "backend_path", factory->qnn_backend_path.c_str()); - ORT_API_RETURN_IF_ERROR( - factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options, - &ep_devices[num_ep_devices++])); + OrtStatus* status = factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options, + &ep_devices[num_ep_devices++]); + + factory->ort_api.ReleaseKeyValuePairs(ep_options); + ORT_API_RETURN_IF_ERROR(status); } } diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index a9557f7b9aa87..562d54d1bf977 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -210,7 +210,7 @@ Status WebGpuContext::Run(ComputeContext& context, const ProgramBase& program) { return tensor != nullptr && tensor->Location().mem_type == OrtMemType::OrtMemTypeDefault && tensor->Location().device.Type() == OrtDevice::GPU && - !strcmp(tensor->Location().name, WEBGPU_BUFFER); + !strcmp(tensor->Location().name.c_str(), WEBGPU_BUFFER); }), "All inputs must be tensors on WebGPU buffers."); @@ -219,7 +219,7 @@ Status WebGpuContext::Run(ComputeContext& context, const ProgramBase& program) { return tensor != nullptr && tensor->Location().mem_type == OrtMemType::OrtMemTypeDefault && tensor->Location().device.Type() == OrtDevice::GPU && - !strcmp(tensor->Location().name, WEBGPU_BUFFER); + !strcmp(tensor->Location().name.c_str(), WEBGPU_BUFFER); }), "All outputs must be tensors on WebGPU buffers."); } diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc index 39b785c327d56..9c40eb75780ee 100644 --- a/onnxruntime/core/session/environment.cc +++ b/onnxruntime/core/session/environment.cc @@ -79,7 +79,7 @@ static bool AreOrtMemoryInfosEquivalent( bool ignore_alignment = false) { return left.mem_type == right.mem_type && (ignore_alignment ? left.device.EqualIgnoringAlignment(right.device) : left.device == right.device) && - (!match_name || strcmp(left.name, right.name) == 0); + (!match_name || left.name == right.name); } std::vector::const_iterator FindExistingAllocator(const std::vector& allocators, diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index 85ea958981e2c..124d748029fd4 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -53,11 +53,11 @@ void LoraAdapter::MemoryMap(const std::filesystem::path& file_path) { static std::unique_ptr GetDataTransfer(const OrtMemoryInfo& mem_info) { std::unique_ptr data_transfer; - if (strcmp(mem_info.name, onnxruntime::CPU) == 0) { + if (mem_info.name == onnxruntime::CPU) { return data_transfer; } - if (strcmp(mem_info.name, onnxruntime::CUDA) == 0) { + if (mem_info.name == onnxruntime::CUDA) { #if defined(USE_CUDA) || defined(USE_CUDA_PROVIDER_INTERFACE) auto* cuda_provider_info = TryGetProviderInfo_CUDA(); if (cuda_provider_info != nullptr) { diff --git a/onnxruntime/test/framework/TestAllocatorManager.cc b/onnxruntime/test/framework/TestAllocatorManager.cc index 30f2686cd62f5..6440a805cdc59 100644 --- a/onnxruntime/test/framework/TestAllocatorManager.cc +++ b/onnxruntime/test/framework/TestAllocatorManager.cc @@ -10,7 +10,7 @@ namespace test { class DummyArena : public IAllocator { public: explicit DummyArena(std::unique_ptr resource_allocator) - : IAllocator(OrtMemoryInfo(resource_allocator->Info().name, + : IAllocator(OrtMemoryInfo(resource_allocator->Info().name.c_str(), OrtAllocatorType::OrtDeviceAllocator, resource_allocator->Info().device, resource_allocator->Info().mem_type)), diff --git a/onnxruntime/test/framework/allocator_test.cc b/onnxruntime/test/framework/allocator_test.cc index 3efba6f1b6e52..445e023746aaa 100644 --- a/onnxruntime/test/framework/allocator_test.cc +++ b/onnxruntime/test/framework/allocator_test.cc @@ -13,7 +13,7 @@ namespace test { TEST(AllocatorTest, CPUAllocatorTest) { auto cpu_arena = TestCPUExecutionProvider()->CreatePreferredAllocators()[0]; - ASSERT_STREQ(cpu_arena->Info().name, CPU); + ASSERT_STREQ(cpu_arena->Info().name.c_str(), CPU); EXPECT_EQ(cpu_arena->Info().device.Id(), 0); const auto expected_allocator_type = DoesCpuAllocatorSupportArenaUsage() diff --git a/onnxruntime/test/framework/tensor_test.cc b/onnxruntime/test/framework/tensor_test.cc index 2ac1a93013932..f08675271de21 100644 --- a/onnxruntime/test/framework/tensor_test.cc +++ b/onnxruntime/test/framework/tensor_test.cc @@ -29,7 +29,7 @@ void CPUTensorTest(std::vector dims, const int offset_elements = 0) { EXPECT_EQ(shape.GetDims(), tensor_shape.GetDims()); EXPECT_EQ(t.DataType(), DataTypeImpl::GetType()); auto& location = t.Location(); - EXPECT_STREQ(location.name, CPU); + EXPECT_STREQ(location.name.c_str(), CPU); EXPECT_EQ(location.device.Id(), 0); const T* t_data = t.Data(); @@ -47,7 +47,7 @@ void CPUTensorTest(std::vector dims, const int offset_elements = 0) { EXPECT_EQ(shape.GetDims(), tensor_shape.GetDims()); EXPECT_EQ(new_t.DataType(), DataTypeImpl::GetType()); auto& new_location = new_t.Location(); - ASSERT_STREQ(new_location.name, CPU); + ASSERT_STREQ(new_location.name.c_str(), CPU); EXPECT_EQ(new_location.device.Id(), 0); } } @@ -135,7 +135,7 @@ TEST(TensorTest, EmptyTensorTest) { EXPECT_TRUE(!data); auto& location = t.Location(); - ASSERT_STREQ(location.name, CPU); + ASSERT_STREQ(location.name.c_str(), CPU); EXPECT_EQ(location.device.Id(), 0); const auto expected_allocator_type = DoesCpuAllocatorSupportArenaUsage() @@ -160,7 +160,7 @@ TEST(TensorTest, StringTensorTest) { EXPECT_EQ(shape, tensor_shape); EXPECT_EQ(t.DataType(), DataTypeImpl::GetType()); auto& location = t.Location(); - ASSERT_STREQ(location.name, CPU); + ASSERT_EQ(location.name, CPU); EXPECT_EQ(location.device.Id(), 0); std::string* new_data = t.MutableData(); diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index e8291a36447ca..ecfaf34c8a076 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -216,7 +216,7 @@ TEST(LoraAdapterTest, VerifyDeviceCopy) { for (; begin != end; ++begin) { const auto& [_, param] = *begin; const auto& tensor_device = param.GetDeviceOrMapped().Get(); - ASSERT_EQ(0, strcmp(tensor_device.Location().name, onnxruntime::CUDA)); + ASSERT_EQ(0, strcmp(tensor_device.Location().name.c_str(), onnxruntime::CUDA)); const auto& tensor_cpu = param.GetMapped().Get(); ASSERT_EQ(tensor_cpu.Shape().Size(), tensor_device.Shape().Size()); diff --git a/onnxruntime/test/mlas/unittest/test_sq8bitgemm.cpp b/onnxruntime/test/mlas/unittest/test_sq8bitgemm.cpp index 3ed283d54f41d..1be05d88849cd 100644 --- a/onnxruntime/test/mlas/unittest/test_sq8bitgemm.cpp +++ b/onnxruntime/test/mlas/unittest/test_sq8bitgemm.cpp @@ -773,7 +773,8 @@ class MlasSQ8BitGemmKernelTest : public MlasTestBase { N, K, 8, BlkLen, MLAS_QNBIT_GEMM_COMPUTE_TYPE::SQNBIT_CompInt8, nullptr, packedBuffer, nullptr, HasZp, inputZp, nullptr); - PackedQuantBDataStruct packedQuantB(packedBuffer, N, BlkCount, BlkLen, true); + const bool isQuantAUnsigned = GetMlasPlatform().ArmNeonIsQuantActivationsUnsigned; + PackedQuantBDataStruct packedQuantB(packedBuffer, N, BlkCount, BlkLen, isQuantAUnsigned); auto* C = C_.GetBuffer(M * ldc, true); auto* ref = ref_.GetBuffer(M * ldc, true); @@ -825,7 +826,9 @@ class MlasSQ8BitGemmKernelTest : public MlasTestBase { void ExecuteShort(void) override { Execute<1, 16, 1, 16>(); + Execute<1, 1, 1, 16>(); Execute<7, 2, 4, 16>(); + Execute<7, 128, 4, 16>(); Execute<8, 497, 5, 16>(); Execute<1, 3072, 128, 16>(); Execute<2, 3072, 128, 16>(); diff --git a/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc b/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc index 91a4fe9a54251..af49bd0e3d58d 100644 --- a/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc @@ -24,7 +24,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) { size_t size = 1024; - EXPECT_STREQ(cuda_arena->Info().name, CUDA); + EXPECT_STREQ(cuda_arena->Info().name.c_str(), CUDA); EXPECT_EQ(cuda_arena->Info().device.Id(), cuda_device_id); EXPECT_EQ(cuda_arena->Info().mem_type, OrtMemTypeDefault); EXPECT_EQ(cuda_arena->Info().alloc_type, OrtArenaAllocator); @@ -38,7 +38,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) { auto pinned_allocator = CreateAllocator(pinned_memory_info); - EXPECT_STREQ(pinned_allocator->Info().name, CUDA_PINNED); + EXPECT_STREQ(pinned_allocator->Info().name.c_str(), CUDA_PINNED); EXPECT_EQ(pinned_allocator->Info().device.Id(), 0); EXPECT_EQ(pinned_allocator->Info().mem_type, OrtMemTypeCPUOutput); EXPECT_EQ(pinned_allocator->Info().alloc_type, OrtArenaAllocator); @@ -50,7 +50,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) { AllocatorCreationInfo cpu_memory_info( [](int) { return std::make_unique(); }, true); const auto& cpu_arena = CreateAllocator(cpu_memory_info); - EXPECT_STREQ(cpu_arena->Info().name, CPU); + EXPECT_STREQ(cpu_arena->Info().name.c_str(), CPU); EXPECT_EQ(cpu_arena->Info().device.Id(), 0); EXPECT_EQ(cpu_arena->Info().mem_type, OrtMemTypeDefault); EXPECT_EQ(cpu_arena->Info().alloc_type, OrtArenaAllocator); diff --git a/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml index f22a26cec6d88..3acf0788ab5c3 100644 --- a/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml @@ -13,6 +13,21 @@ parameters: type: boolean default: false +- name: PreReleaseVersionSuffixString + displayName: Suffix added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the type of pre-release package. + type: string + values: + - alpha + - beta + - rc + - none + default: none + +- name: PreReleaseVersionSuffixNumber + displayName: Number added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the sequence of a pre-release package. + type: number + default: 0 + - name: PackageName displayName: What is the package name? Override using an environment variable CustomPackageName. type: string @@ -69,6 +84,12 @@ extends: exclusionsFile: '$(Build.SourcesDirectory)\tools\ci_build\policheck_exclusions.xml' stages: + - template: stages/set_packaging_variables_stage.yml + parameters: + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + PreReleaseVersionSuffixString: ${{ parameters.PreReleaseVersionSuffixString }} + PreReleaseVersionSuffixNumber: ${{ parameters.PreReleaseVersionSuffixNumber }} + - template: templates/win-ci.yml parameters: ort_build_pool_name: 'onnxruntime-Win2022-GPU-A10' diff --git a/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml index a94ceea6354e5..02fae0b10ac39 100644 --- a/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml @@ -14,6 +14,22 @@ parameters: type: boolean default: false + +- name: PreReleaseVersionSuffixString + displayName: Suffix added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the type of pre-release package. + type: string + values: + - alpha + - beta + - rc + - none + default: none + +- name: PreReleaseVersionSuffixNumber + displayName: Number added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the sequence of a pre-release package. + type: number + default: 0 + - name: DoEsrp displayName: Run code sign tasks? Must be true if you are doing an Onnx Runtime release. type: boolean @@ -68,6 +84,11 @@ extends: enabled: true exclusionsFile: '$(Build.SourcesDirectory)\tools\ci_build\policheck_exclusions.xml' stages: + - template: stages/set_packaging_variables_stage.yml + parameters: + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + PreReleaseVersionSuffixString: ${{ parameters.PreReleaseVersionSuffixString }} + PreReleaseVersionSuffixNumber: ${{ parameters.PreReleaseVersionSuffixNumber }} - template: templates/qnn-ep-win.yml parameters: diff --git a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml index eeb8709e0dea2..b6388c22fae98 100644 --- a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml +++ b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml @@ -11,7 +11,8 @@ parameters: stages: - stage: ${{ parameters.StageName }} - dependsOn: [] + dependsOn: + - Setup jobs: - job: ${{ parameters.StageName }} timeoutInMinutes: 300 @@ -45,6 +46,7 @@ stages: artifactName: "drop-signed-nuget-qnn" variables: OrtPackageId: ${{ parameters.OrtNugetPackageId }} + ReleaseVersionSuffix: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']] commonBuildArgs: '--skip_submodule_sync --build_shared_lib --client_package_build --cmake_generator "Visual Studio 17 2022" --config ${{ parameters.build_config }} --parallel --use_vcpkg --use_vcpkg_ms_internal_asset_cache --use_binskim_compliant_compile_flags ' steps: @@ -107,7 +109,12 @@ stages: solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.DesktopOnly.CSharp.sln' platform: 'Any CPU' configuration: ${{ parameters.build_config }} - msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }}' + msbuildArguments: > + -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" + -p:OrtPackageId=$(OrtPackageId) + -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} + -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) + -p:PackageVersion=$(OnnxRuntimeVersion) workingDirectory: '$(Build.SourcesDirectory)\csharp' - ${{ if eq(parameters.DoEsrp, true) }}: @@ -123,7 +130,7 @@ stages: solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj' platform: 'Any CPU' configuration: ${{ parameters.build_config }} - msbuildArguments: '-t:CreatePackage -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:TargetArchitecture=arm64' + msbuildArguments: '-t:CreatePackage -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) -p:TargetArchitecture=arm64' workingDirectory: '$(Build.SourcesDirectory)\csharp' - task: CopyFiles@2 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index b52d0cbcf3fea..a52e57138117a 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -3,7 +3,7 @@ numpy==2.2.6 mypy pytest setuptools==78.1.1 -wheel==0.42.0 +wheel==0.45.1 onnx==1.18.0 argparse sympy==1.14 diff --git a/tools/python/run_packaging_pipelines.py b/tools/python/run_packaging_pipelines.py index 2ae66332781b9..4948f35c642e8 100644 --- a/tools/python/run_packaging_pipelines.py +++ b/tools/python/run_packaging_pipelines.py @@ -277,7 +277,9 @@ def filter_pipelines(pipelines: list[dict], token: str, branch: str, is_pr_build if result: filtered_results.append(result) - print(f"\nFound {len(filtered_results)} pipelines to trigger.") + print(f"\nFound {len(filtered_results)} pipelines to trigger:") + for result in filtered_results: + print(f" - {result['pipeline']['name']}") return filtered_results @@ -453,6 +455,14 @@ def main(): nightly_override = "0" release_override = "true" + # If pre-release flags are used, it implies a release build. + if args.pre_release_suffix_string: + print("Pre-release suffix provided. Forcing 'release' build mode.") + if args.build_mode and args.build_mode != "release": + print(f"Warning: --build-mode={args.build_mode} is overridden by pre-release flags.") + nightly_override = "0" + release_override = "true" + for result in pipelines_to_trigger: pipeline = result["pipeline"] packaging_type = result["packaging_type"]