diff --git a/onnxruntime/core/framework/endian_utils.cc b/onnxruntime/core/framework/endian_utils.cc index 640ba8df29442..e9167f7dc7d24 100644 --- a/onnxruntime/core/framework/endian_utils.cc +++ b/onnxruntime/core/framework/endian_utils.cc @@ -26,9 +26,9 @@ OutputIt ReverseCopy(BidirIt first, BidirIt last, OutputIt d_first) { } // namespace -void SwapByteOrderCopy( - size_t element_size_in_bytes, - gsl::span source_bytes, gsl::span destination_bytes) { +void SwapByteOrderCopy(size_t element_size_in_bytes, + gsl::span source_bytes, + gsl::span destination_bytes) { assert(element_size_in_bytes > 0); assert(source_bytes.size_bytes() % element_size_in_bytes == 0); assert(source_bytes.size_bytes() == destination_bytes.size_bytes()); @@ -40,28 +40,38 @@ void SwapByteOrderCopy( for (size_t element_offset = 0, element_offset_end = source_bytes.size_bytes(); element_offset < element_offset_end; element_offset += element_size_in_bytes) { - const auto source_element_bytes = - source_bytes.subspan(element_offset, element_size_in_bytes); - const auto dest_element_bytes = - destination_bytes.subspan(element_offset, element_size_in_bytes); - ReverseCopy( - source_element_bytes.data(), - source_element_bytes.data() + source_element_bytes.size_bytes(), - dest_element_bytes.data()); + const auto source_element_bytes = source_bytes.subspan(element_offset, element_size_in_bytes); + const auto dest_element_bytes = destination_bytes.subspan(element_offset, element_size_in_bytes); + ReverseCopy(source_element_bytes.data(), + source_element_bytes.data() + source_element_bytes.size_bytes(), + dest_element_bytes.data()); } } namespace detail { -void CopyLittleEndian(size_t element_size_in_bytes, gsl::span source_bytes, gsl::span destination_bytes) { +Status CopyLittleEndian(size_t element_size_in_bytes, + gsl::span source_bytes, + gsl::span destination_bytes) { + ORT_RETURN_IF(source_bytes.size_bytes() != destination_bytes.size_bytes(), + "source and destination buffer size mismatch"); + if (endian::native == endian::little) { std::memcpy(destination_bytes.data(), source_bytes.data(), source_bytes.size_bytes()); } else { SwapByteOrderCopy(element_size_in_bytes, source_bytes, destination_bytes); } + + return Status::OK(); } } // namespace detail +common::Status ReadLittleEndian(size_t element_size, + gsl::span source_bytes, + gsl::span destination_bytes) { + return detail::CopyLittleEndian(element_size, source_bytes, destination_bytes); +} + } // namespace utils } // namespace onnxruntime diff --git a/onnxruntime/core/framework/endian_utils.h b/onnxruntime/core/framework/endian_utils.h index 7da16e670981e..92460679a9d5b 100644 --- a/onnxruntime/core/framework/endian_utils.h +++ b/onnxruntime/core/framework/endian_utils.h @@ -27,8 +27,9 @@ namespace utils { * @param source_bytes The source byte span. * @param destination_bytes The destination byte span. */ -void SwapByteOrderCopy( - size_t element_size_in_bytes, gsl::span source_bytes, gsl::span destination_bytes); +void SwapByteOrderCopy(size_t element_size_in_bytes, + gsl::span source_bytes, + gsl::span destination_bytes); namespace detail { @@ -36,43 +37,45 @@ namespace detail { * Copies between two buffers where one is little-endian and the other has * native endian-ness. */ -void CopyLittleEndian( - size_t element_size_in_bytes, gsl::span source_bytes, gsl::span destination_bytes); +Status CopyLittleEndian(size_t element_size_in_bytes, + gsl::span source_bytes, + gsl::span destination_bytes); } // namespace detail /** * Reads from a little-endian source. */ +common::Status ReadLittleEndian(size_t element_size, + gsl::span source_bytes, + gsl::span destination_bytes); + +/** + * Reads from a little-endian source with check that T is trivially copyable. + * @remarks Check is skipped for if building with gcc v4 + */ template -common::Status ReadLittleEndian(gsl::span source_bytes, gsl::span destination) { +common::Status ReadLittleEndian(gsl::span source_bytes, gsl::span destination) { // std::is_trivially_copyable is not implemented in older versions of GCC #if !defined(__GNUC__) || __GNUC__ >= 5 static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); #endif - ORT_RETURN_IF_NOT(source_bytes.size_bytes() == destination.size_bytes(), - "source and destination buffer size mismatch"); - const auto destination_bytes = gsl::make_span( - reinterpret_cast(destination.data()), destination.size_bytes()); - detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes); - return common::Status::OK(); + const auto destination_bytes = gsl::make_span(reinterpret_cast(destination.data()), + destination.size_bytes()); + return ReadLittleEndian(sizeof(T), source_bytes, destination_bytes); } /** * Writes to a little-endian destination. */ template -common::Status WriteLittleEndian(gsl::span source, gsl::span destination_bytes) { +common::Status WriteLittleEndian(gsl::span source, gsl::span destination_bytes) { // std::is_trivially_copyable is not implemented in older versions of GCC #if !defined(__GNUC__) || __GNUC__ >= 5 static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); #endif - ORT_RETURN_IF_NOT(source.size_bytes() == destination_bytes.size_bytes(), - "source and destination buffer size mismatch"); - const auto source_bytes = gsl::make_span( - reinterpret_cast(source.data()), source.size_bytes()); - detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes); - return common::Status::OK(); + const auto source_bytes = gsl::make_span(reinterpret_cast(source.data()), source.size_bytes()); + return detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes); } } // namespace utils diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index e9a39e5f74579..0e6f63cae91e6 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -97,29 +97,44 @@ std::vector GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorP } // This function doesn't support string tensors -template -static Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length, size_t expected_size, - /*out*/ T* p_data) { +static Status UnpackTensorWithRawDataImpl(const void* raw_data, size_t raw_data_len, + size_t expected_num_elements, size_t element_size, + /*out*/ unsigned char* p_data) { + auto src = gsl::make_span(static_cast(raw_data), raw_data_len); + auto dst = gsl::make_span(p_data, expected_num_elements * element_size); + size_t expected_size_in_bytes; - if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_size, sizeof(T), &expected_size_in_bytes)) { + if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_num_elements, element_size, &expected_size_in_bytes)) { return Status(onnxruntime::common::ONNXRUNTIME, onnxruntime::common::INVALID_ARGUMENT, "size overflow"); } - if (raw_data_length != expected_size_in_bytes) + + if (dst.size_bytes() != expected_size_in_bytes) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "UnpackTensor: the pre-allocated size does not match the raw data size, expected ", - expected_size_in_bytes, ", got ", raw_data_length); + expected_size_in_bytes, ", got ", dst.size_bytes()); + } - const char* const raw_data_bytes = reinterpret_cast(raw_data); - ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian( - gsl::make_span(raw_data_bytes, raw_data_length), gsl::make_span(p_data, expected_size))); - return Status::OK(); + // ReadLittleEndian checks src and dst buffers are the same size + return onnxruntime::utils::ReadLittleEndian(element_size, src, dst); +} + +template +Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_len, size_t expected_num_elements, + /*out*/ T* p_data) { + // std::is_trivially_copyable is not implemented in older versions of GCC +#if !defined(__GNUC__) || __GNUC__ >= 5 + static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); +#endif + + return UnpackTensorWithRawDataImpl(raw_data, raw_data_len, expected_num_elements, sizeof(T), + reinterpret_cast(p_data)); } static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_proto, const ORTCHAR_T* tensor_proto_dir, std::basic_string& external_file_path, onnxruntime::FileOffsetType& file_offset, - SafeInt& tensor_data_length) { + SafeInt& tensor_byte_size) { ORT_RETURN_IF_NOT(onnxruntime::utils::HasExternalData(tensor_proto), "Tensor does not have external data to read from."); @@ -137,16 +152,12 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot file_offset = external_data_info->GetOffset(); - ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>( - tensor_proto, &tensor_data_length)); + ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &tensor_byte_size)); const size_t external_data_length = external_data_info->GetLength(); - ORT_RETURN_IF_NOT( - external_data_length == 0 || - external_data_length == tensor_data_length, - "TensorProto external data size mismatch. ", - "Computed size: ", *&tensor_data_length, - ", external_data.length: ", external_data_length); + ORT_RETURN_IF_NOT(external_data_length == 0 || external_data_length == tensor_byte_size, + "TensorProto external data size mismatch. Computed size: ", *&tensor_byte_size, + ", external_data.length: ", external_data_length); return Status::OK(); } @@ -157,8 +168,8 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot // This function does not unpack string_data of an initializer tensor static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto, const ORTCHAR_T* tensor_proto_dir, - std::unique_ptr& unpacked_tensor, - SafeInt& tensor_data_length) { + std::unique_ptr& unpacked_tensor, + SafeInt& tensor_byte_size) { std::basic_string external_file_path; onnxruntime::FileOffsetType file_offset; ORT_RETURN_IF_ERROR(GetExternalDataInfo( @@ -166,14 +177,14 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso tensor_proto_dir, external_file_path, file_offset, - tensor_data_length)); + tensor_byte_size)); - unpacked_tensor.reset(new uint8_t[*&tensor_data_length]); + unpacked_tensor.reset(new unsigned char[*&tensor_byte_size]); ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer( external_file_path.c_str(), file_offset, - tensor_data_length, - gsl::make_span(reinterpret_cast(unpacked_tensor.get()), tensor_data_length))); + tensor_byte_size, + gsl::make_span(reinterpret_cast(unpacked_tensor.get()), tensor_byte_size))); return Status::OK(); } @@ -182,91 +193,108 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso namespace onnxruntime { namespace utils { #if !defined(ORT_MINIMAL_BUILD) -#define DEFINE_UNPACK_EXTERNAL_TENSOR(T) \ - template <> \ - Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor, \ - const ORTCHAR_T* tensor_proto_dir, size_t expected_size, \ - /*out*/ T* p_data) { \ - ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data"); \ - \ - std::unique_ptr unpacked_tensor; \ - SafeInt tensor_byte_size = 0; \ - ORT_RETURN_IF_ERROR(ReadExternalDataForTensor( \ - tensor, \ - tensor_proto_dir, \ - unpacked_tensor, \ - tensor_byte_size)); \ - \ - size_t element_count = tensor_byte_size / sizeof(T); \ - ORT_RETURN_IF_NOT(expected_size == element_count, "Expected data size does not match the actual external data size."); \ - ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian( \ - gsl::make_span(reinterpret_cast(unpacked_tensor.get()), tensor_byte_size), \ - gsl::make_span(p_data, expected_size))); \ - \ - return Status::OK(); \ - } +static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto& tensor, + const ORTCHAR_T* tensor_proto_dir, + size_t expected_num_elements, size_t element_size, + /*out*/ unsigned char* p_data) { + ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data"); + + std::unique_ptr unpacked_tensor; + SafeInt tensor_byte_size = 0; + ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor, tensor_byte_size)); + + // ReadLittleEndian checks src and dst buffers are the same size + auto src_span = gsl::make_span(unpacked_tensor.get(), tensor_byte_size); + auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size); + + return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span); +} + +template +Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor, + const ORTCHAR_T* tensor_proto_dir, size_t expected_num_elements, + /*out*/ T* p_data) { + // std::is_trivially_copyable is not implemented in older versions of GCC +#if !defined(__GNUC__) || __GNUC__ >= 5 + static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); +#endif + + return UnpackTensorWithExternalDataImpl(tensor, tensor_proto_dir, expected_num_elements, sizeof(T), + reinterpret_cast(p_data)); +} -DEFINE_UNPACK_EXTERNAL_TENSOR(float) -DEFINE_UNPACK_EXTERNAL_TENSOR(double) -DEFINE_UNPACK_EXTERNAL_TENSOR(uint8_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(int8_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(int16_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(uint16_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(int32_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(int64_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(uint64_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(uint32_t) -DEFINE_UNPACK_EXTERNAL_TENSOR(bool) -DEFINE_UNPACK_EXTERNAL_TENSOR(MLFloat16) -DEFINE_UNPACK_EXTERNAL_TENSOR(BFloat16) +#define INSTANTIATE_UNPACK_EXTERNAL_TENSOR(type) \ + template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const ORTCHAR_T*, size_t, type*); + +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(float) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(double) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint8_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int8_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int16_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint16_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int32_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int64_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint64_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint32_t) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(bool) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(MLFloat16) +INSTANTIATE_UNPACK_EXTERNAL_TENSOR(BFloat16) template <> Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& /*tensor*/, - const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_size*/, + const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_num_elements*/, /*out*/ std::string* /*p_data*/) { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, - "External data type cannot be STRING."); + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "External data type cannot be STRING."); } #endif //!defined(ORT_MINIMAL_BUILD) -// This macro doesn't work for Float16/bool/string tensors -#define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size) \ - template <> \ - Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, \ - /*out*/ T* p_data, size_t expected_size) { \ - if (nullptr == p_data) { \ - const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size(); \ - if (size == 0) return Status::OK(); \ - return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT); \ - } \ - if (nullptr == p_data || Type != tensor.data_type()) { \ - return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT); \ - } \ - if (raw_data != nullptr) { \ - return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data); \ - } \ - if (static_cast(tensor.field_size()) != expected_size) \ - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "corrupted protobuf data: tensor shape size(", \ - expected_size, ") does not match the data size(", tensor.field_size(), ") in proto"); \ - auto& data = tensor.field_name(); \ - for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter) \ - *p_data++ = *reinterpret_cast(data_iter); \ - return Status::OK(); \ +// implementation of type specific unpack of data contained within the TensorProto +template +Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, + /*out*/ T* p_data, size_t expected_num_elements); + +#define DEFINE_UNPACK_TENSOR_IMPL(T, Type, field_name, field_size) \ + template <> \ + Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, \ + /*out*/ T* p_data, size_t expected_num_elements) { \ + if (nullptr == p_data) { \ + const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size(); \ + if (size == 0) return Status::OK(); \ + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT); \ + } \ + if (nullptr == p_data || Type != tensor.data_type()) { \ + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT); \ + } \ + if (raw_data != nullptr) { \ + return UnpackTensorWithRawData(raw_data, raw_data_len, expected_num_elements, p_data); \ + } \ + if (static_cast(tensor.field_size()) != expected_num_elements) \ + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, \ + "corrupted protobuf data: tensor shape size(", expected_num_elements, \ + ") does not match the data size(", tensor.field_size(), ") in proto"); \ + auto& data = tensor.field_name(); \ + for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter) \ + *p_data++ = *reinterpret_cast(data_iter); \ + return Status::OK(); \ } // TODO: complex64 complex128 -DEFINE_UNPACK_TENSOR(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size) -DEFINE_UNPACK_TENSOR(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size); -DEFINE_UNPACK_TENSOR(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size) -DEFINE_UNPACK_TENSOR(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size) -DEFINE_UNPACK_TENSOR(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size) -DEFINE_UNPACK_TENSOR(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size) -DEFINE_UNPACK_TENSOR(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size) -DEFINE_UNPACK_TENSOR(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size) -DEFINE_UNPACK_TENSOR(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size) -DEFINE_UNPACK_TENSOR(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size) - -// doesn't support raw data +DEFINE_UNPACK_TENSOR_IMPL(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size) +DEFINE_UNPACK_TENSOR_IMPL(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size); +DEFINE_UNPACK_TENSOR_IMPL(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size) +DEFINE_UNPACK_TENSOR_IMPL(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size) +DEFINE_UNPACK_TENSOR_IMPL(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size) +DEFINE_UNPACK_TENSOR_IMPL(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size) +DEFINE_UNPACK_TENSOR_IMPL(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size) +DEFINE_UNPACK_TENSOR_IMPL(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size) +DEFINE_UNPACK_TENSOR_IMPL(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size) +DEFINE_UNPACK_TENSOR_IMPL(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size) + +// +// Specializations of UnpackTensor that need custom handling for the input type +// + +// UnpackTensor. Note: doesn't support raw data template <> Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/, /*out*/ std::string* p_data, size_t expected_size) { @@ -289,6 +317,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw return Status::OK(); } + +// UnpackTensor template <> Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ bool* p_data, size_t expected_size) { @@ -314,6 +344,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d return Status::OK(); } + +// UnpackTensor template <> Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ MLFloat16* p_data, size_t expected_size) { @@ -346,6 +378,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d return Status::OK(); } +// UnpackTensor template <> Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ BFloat16* p_data, size_t expected_size) { @@ -380,6 +413,49 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d return Status::OK(); } +// UnpackTensor from raw data, external data or the type specific data field. +// Uses the model path to construct the full path for loading external data. In case when model_path is empty +// it uses current directory. +template +Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path, + /*out*/ T* p_data, size_t expected_num_elements) { +#if !defined(ORT_MINIMAL_BUILD) + if (HasExternalData(tensor)) { + return UnpackTensorWithExternalData( + tensor, + model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(), + expected_num_elements, + p_data); + } +#else + ORT_UNUSED_PARAMETER(model_path); + ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build."); +#endif + + return HasRawData(tensor) + ? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_num_elements) + : UnpackTensor(tensor, nullptr, 0, p_data, expected_num_elements); +} + +// instantiate the UnpackTensor variant that supports external data +#define INSTANTIATE_UNPACK_TENSOR(type) \ + template Status UnpackTensor(const ONNX_NAMESPACE::TensorProto&, const Path&, type* p_data, size_t); + +INSTANTIATE_UNPACK_TENSOR(float) +INSTANTIATE_UNPACK_TENSOR(double) +INSTANTIATE_UNPACK_TENSOR(uint8_t) +INSTANTIATE_UNPACK_TENSOR(int8_t) +INSTANTIATE_UNPACK_TENSOR(int16_t) +INSTANTIATE_UNPACK_TENSOR(uint16_t) +INSTANTIATE_UNPACK_TENSOR(int32_t) +INSTANTIATE_UNPACK_TENSOR(int64_t) +INSTANTIATE_UNPACK_TENSOR(uint64_t) +INSTANTIATE_UNPACK_TENSOR(uint32_t) +INSTANTIATE_UNPACK_TENSOR(bool) +INSTANTIATE_UNPACK_TENSOR(MLFloat16) +INSTANTIATE_UNPACK_TENSOR(BFloat16) +INSTANTIATE_UNPACK_TENSOR(std::string) + #define CASE_PROTO_TRACE(X, Y) \ case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X: \ if (!IAllocator::CalcMemSizeForArrayWithAlignment(size, sizeof(Y), out)) { \ @@ -472,12 +548,6 @@ static void UnInitTensor(void* param) noexcept { delete p; } -#define CASE_PROTO(X, Y) \ - case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X: \ - ORT_RETURN_IF_ERROR( \ - UnpackTensor(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, static_cast(tensor_size))); \ - break; - class AutoDelete { public: OrtCallback d{nullptr, nullptr}; @@ -530,6 +600,14 @@ static void MoveOrtCallback(OrtCallback& from, OrtCallback& to) { from.f = nullptr; from.param = nullptr; } + +#define CASE_PROTO(X, Y) \ + case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X: \ + ORT_RETURN_IF_ERROR( \ + UnpackTensor(tensor_proto, raw_data, raw_data_len, \ + (Y*)preallocated, static_cast(tensor_size))); \ + break; + #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 6239) @@ -632,7 +710,8 @@ Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* model_path, deleter.param = new UnInitializeParam{preallocated, preallocated_size, ele_type}; } ORT_RETURN_IF_ERROR(UnpackTensor(tensor_proto, raw_data, raw_data_len, - (std::string*)preallocated, static_cast(tensor_size))); + static_cast(preallocated), + static_cast(tensor_size))); break; default: { std::ostringstream ostr; @@ -1034,41 +1113,42 @@ template common::Status GetSizeInBytesFromTensorProto(const ONN size_t* out); template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out); -#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE) \ - case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: { \ - if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) { \ - ORT_RETURN_IF_ERROR(ReadExternalDataForTensor( \ - initializer, \ - model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(), \ - unpacked_tensor, \ - tensor_byte_size)); \ - tensor_data_length = tensor_byte_size; \ - return Status::OK(); \ - } else { \ - size_t element_count = 0; \ - if (initializer.has_raw_data()) { \ - tensor_byte_size = initializer.raw_data().size(); \ - element_count = tensor_byte_size / sizeof(ELEMENT_TYPE); \ - } else { \ - element_count = initializer.DATA_SIZE(); \ - tensor_byte_size = element_count * sizeof(ELEMENT_TYPE); \ - } \ - tensor_data_length = tensor_byte_size; \ - unpacked_tensor.reset(new uint8_t[tensor_data_length]); \ - return onnxruntime::utils::UnpackTensor( \ - initializer, \ - initializer.has_raw_data() ? initializer.raw_data().data() : nullptr, \ - initializer.has_raw_data() ? initializer.raw_data().size() : 0, \ - reinterpret_cast(unpacked_tensor.get()), element_count); \ - } \ - break; \ +#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE) \ + case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: { \ + size_t element_count = 0; \ + if (initializer.has_raw_data()) { \ + tensor_byte_size = initializer.raw_data().size(); \ + element_count = tensor_byte_size / sizeof(ELEMENT_TYPE); \ + } else { \ + element_count = initializer.DATA_SIZE(); \ + tensor_byte_size = element_count * sizeof(ELEMENT_TYPE); \ + } \ + tensor_byte_size_out = tensor_byte_size; \ + unpacked_tensor.reset(new unsigned char[tensor_byte_size_out]); \ + return onnxruntime::utils::UnpackTensor( \ + initializer, \ + initializer.has_raw_data() ? initializer.raw_data().data() : nullptr, \ + initializer.has_raw_data() ? initializer.raw_data().size() : 0, \ + reinterpret_cast(unpacked_tensor.get()), element_count); \ + break; \ } Status UnpackInitializerData(const onnx::TensorProto& initializer, const Path& model_path, - std::unique_ptr& unpacked_tensor, - size_t& tensor_data_length) { - SafeInt tensor_byte_size = tensor_data_length; + std::unique_ptr& unpacked_tensor, + size_t& tensor_byte_size_out) { + SafeInt tensor_byte_size; + + if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) { + ORT_RETURN_IF_ERROR(ReadExternalDataForTensor( + initializer, + model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(), + unpacked_tensor, + tensor_byte_size)); + tensor_byte_size_out = tensor_byte_size; + return Status::OK(); + } + switch (initializer.data_type()) { CASE_UNPACK(FLOAT, float, float_data_size); CASE_UNPACK(DOUBLE, double, double_data_size); diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h index da00e8eab5bb7..1f3f0c844aafb 100644 --- a/onnxruntime/core/framework/tensorprotoutils.h +++ b/onnxruntime/core/framework/tensorprotoutils.h @@ -238,16 +238,6 @@ inline bool HasName(const ONNX_NAMESPACE::NodeProto& node_proto) { return node_proto.has_name(); } -#if !defined(ORT_MINIMAL_BUILD) -// Unpack tensor which contains external data. Uses the tensor_proto_dir to construct the full path for external data. -// If tensor_proto_dir == nullptr then uses the current directory instead. -// This function does not unpack string_data of a tensor -template -Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor, - const ORTCHAR_T* tensor_proto_dir, size_t expected_size, - /*out*/ T* p_data); -#endif // !defined(ORT_MINIMAL_BUILD) - // UnpackTensor from raw data or the type specific data field. Does not handle external data. // If the tensor does not contain raw data then raw_data should be nullptr and raw_data_len should be 0. template @@ -258,37 +248,21 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d // Uses the model path to construct the full path for loading external data. In case when model_path is empty // it uses current directory. template -Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path, /*out*/ T* p_data, size_t expected_size) { -#if !defined(ORT_MINIMAL_BUILD) - if (HasExternalData(tensor)) { - return UnpackTensorWithExternalData( - tensor, - model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(), - expected_size, - p_data); - } -#else - ORT_UNUSED_PARAMETER(model_path); - ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build."); -#endif - - return HasRawData(tensor) - ? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_size) - : UnpackTensor(tensor, nullptr, 0, p_data, expected_size); -} +Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path, + /*out*/ T* p_data, size_t expected_size); /** * Unpack the data from an initializer tensor * Please note, this function does not unpack string_data of an initializer tensor * @param initializer given initializer tensor * @param initializer_dir model_path to construct external data dir path. When this is empty, current dir is used. - * @param unpacked_tensor the data from the initaizlier in uint8_t* form + * @param unpacked_tensor the data from the initializer in byte form * @param tensor_byte_size the byte size of the unpacked_tensor * @returns Status::OK() if data is unpacked successfully */ common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer, const Path& model_path, - std::unique_ptr& unpacked_tensor, + std::unique_ptr& unpacked_tensor, size_t& tensor_byte_size) ORT_MUST_USE_RESULT; } // namespace utils diff --git a/onnxruntime/test/framework/endian_test.cc b/onnxruntime/test/framework/endian_test.cc index fce9741925862..938adf2cd3db5 100644 --- a/onnxruntime/test/framework/endian_test.cc +++ b/onnxruntime/test/framework/endian_test.cc @@ -14,7 +14,7 @@ namespace test { TEST(EndianTest, EndiannessDetection) { const uint16_t test_value = 0x1234; - const char* test_value_first_byte = reinterpret_cast(&test_value); + const unsigned char* test_value_first_byte = reinterpret_cast(&test_value); if (endian::native == endian::little) { EXPECT_EQ(*test_value_first_byte, 0x34); } else if (endian::native == endian::big) { @@ -23,13 +23,13 @@ TEST(EndianTest, EndiannessDetection) { } TEST(EndianTest, SwapByteOrderCopy) { - const auto src = std::vector{ + const auto src = std::vector{ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'}; - auto result = std::vector(src.size()); + auto result = std::vector(src.size()); { SwapByteOrderCopy(3, gsl::make_span(src), gsl::make_span(result)); - const auto expected = std::vector{ + const auto expected = std::vector{ 'c', 'b', 'a', 'f', 'e', 'd', 'i', 'h', 'g', @@ -39,7 +39,7 @@ TEST(EndianTest, SwapByteOrderCopy) { { SwapByteOrderCopy(4, gsl::make_span(src), gsl::make_span(result)); - const auto expected = std::vector{ + const auto expected = std::vector{ 'd', 'c', 'b', 'a', 'h', 'g', 'f', 'e', 'l', 'k', 'j', 'i'}; diff --git a/onnxruntime/test/framework/tensorutils_test.cc b/onnxruntime/test/framework/tensorutils_test.cc index a048df52fcc26..39f0f6d6f1b2a 100644 --- a/onnxruntime/test/framework/tensorutils_test.cc +++ b/onnxruntime/test/framework/tensorutils_test.cc @@ -76,8 +76,9 @@ TEST(TensorProtoUtilsTest, UnpackTensor) { EXPECT_FALSE(status.IsOK()); } +namespace { template -static std::vector CreateValues() { +std::vector CreateValues() { return {1, 2, 3, 4}; } @@ -86,17 +87,49 @@ std::vector CreateValues() { return {"one", "two", "three", "four"}; } +template <> +std::vector CreateValues() { + return {true, false, false, true}; +} + +template <> +std::vector CreateValues() { + return {MLFloat16(0.f), MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f)}; +} + +template <> +std::vector CreateValues() { + return {BFloat16(0.f), BFloat16(1.f), BFloat16(2.f), BFloat16(3.f)}; +} + +template +void WriteDataToFile(FILE* fp, const std::vector& test_data) { + size_t size_in_bytes = test_data.size() * sizeof(T); + ASSERT_EQ(size_in_bytes, fwrite(test_data.data(), 1, size_in_bytes, fp)); +} + +std::unique_ptr BoolDataFromVector(const std::vector& test_data) { + auto arr = onnxruntime::make_unique(test_data.size()); + std::copy(std::begin(test_data), std::end(test_data), arr.get()); + return arr; +} + +// work around std::vector storing data in bits +template <> +void WriteDataToFile(FILE* fp, const std::vector& test_data) { + auto arr = BoolDataFromVector(test_data); + size_t size_in_bytes = test_data.size() * sizeof(bool); + ASSERT_EQ(size_in_bytes, fwrite(arr.get(), 1, size_in_bytes, fp)); +} + template -static void CreateTensorWithExternalData( - TensorProto_DataType type, - const std::vector& test_data, - std::basic_string& filename, - TensorProto& tensor_proto) { +void CreateTensorWithExternalData(TensorProto_DataType type, const std::vector& test_data, + std::basic_string& filename, + TensorProto& tensor_proto) { // Create external data FILE* fp; CreateTestFile(fp, filename); - size_t size_in_bytes = test_data.size() * sizeof(T); - ASSERT_EQ(size_in_bytes, fwrite(test_data.data(), 1, size_in_bytes, fp)); + WriteDataToFile(fp, test_data); ASSERT_EQ(0, fclose(fp)); // set the tensor_proto to reference this external data @@ -109,15 +142,7 @@ static void CreateTensorWithExternalData( } template -static void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path) { - // Create external data - std::basic_string filename(ORT_TSTR("tensor_XXXXXX")); - TensorProto tensor_proto; - auto test_data = CreateValues(); - CreateTensorWithExternalData(type, test_data, filename, tensor_proto); - std::unique_ptr file_deleter(const_cast(filename.c_str()), - DeleteFileFromDisk); - +void UnpackAndValidate(const TensorProto& tensor_proto, const Path& model_path, const std::vector& test_data) { // Unpack tensor with external data std::vector val(test_data.size()); auto st = utils::UnpackTensor(tensor_proto, model_path, val.data(), test_data.size()); @@ -125,15 +150,45 @@ static void TestUnpackExternalTensor(TensorProto_DataType type, const Path& mode // Validate data for (size_t i = 0; i < test_data.size(); i++) { - ASSERT_EQ(val[i], test_data[i]); + ASSERT_TRUE(val[i] == test_data[i]); // need to use ASSERT_TRUE with '==' to handle MFLoat16 and BFloat16 + } +} + +template <> +void UnpackAndValidate(const TensorProto& tensor_proto, const Path& model_path, + const std::vector& test_data) { + // Unpack tensor with external data + auto arr = onnxruntime::make_unique(test_data.size()); + auto st = utils::UnpackTensor(tensor_proto, model_path, arr.get(), test_data.size()); + ASSERT_TRUE(st.IsOK()) << st.ErrorMessage(); + + // Validate data + for (size_t i = 0; i < test_data.size(); i++) { + ASSERT_TRUE(arr[i] == test_data[i]); } } +template +void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path) { + // Create external data + std::basic_string filename(ORT_TSTR("tensor_XXXXXX")); + TensorProto tensor_proto; + auto test_data = CreateValues(); + CreateTensorWithExternalData(type, test_data, filename, tensor_proto); + std::unique_ptr file_deleter(const_cast(filename.c_str()), + DeleteFileFromDisk); + UnpackAndValidate(tensor_proto, model_path, test_data); +} +} // namespace TEST(TensorProtoUtilsTest, UnpackTensorWithExternalData) { Path model_path; TestUnpackExternalTensor(TensorProto_DataType_FLOAT, model_path); TestUnpackExternalTensor(TensorProto_DataType_DOUBLE, model_path); TestUnpackExternalTensor(TensorProto_DataType_INT32, model_path); + TestUnpackExternalTensor(TensorProto_DataType_INT8, model_path); + TestUnpackExternalTensor(TensorProto_DataType_FLOAT16, model_path); + TestUnpackExternalTensor(TensorProto_DataType_BFLOAT16, model_path); + TestUnpackExternalTensor(TensorProto_DataType_BOOL, model_path); } template diff --git a/onnxruntime/test/optimizer/initializer_test.cc b/onnxruntime/test/optimizer/initializer_test.cc index 3e7c4eb9de46f..eaaad9ad0997c 100644 --- a/onnxruntime/test/optimizer/initializer_test.cc +++ b/onnxruntime/test/optimizer/initializer_test.cc @@ -21,18 +21,17 @@ namespace test { namespace { template Status WriteExternalDataFile(gsl::span data, const PathString& path, ScopedFileDeleter& file_deleter) { - std::vector data_bytes(data.size_bytes()); + std::vector data_bytes(data.size_bytes()); ORT_RETURN_IF_ERROR(onnxruntime::utils::WriteLittleEndian(data, gsl::make_span(data_bytes))); std::ofstream out{path, std::ios::binary | std::ios::trunc}; - ORT_RETURN_IF_NOT(out && out.write(data_bytes.data(), data_bytes.size()), + ORT_RETURN_IF_NOT(out && out.write(reinterpret_cast(data_bytes.data()), data_bytes.size()), "out && out.write(data_bytes.data(), data_bytes.size()) was false"); file_deleter = ScopedFileDeleter{path}; return Status::OK(); } -void SetTensorProtoExternalData( - const std::string& key, const std::string& value, - ONNX_NAMESPACE::TensorProto& tensor_proto) { +void SetTensorProtoExternalData(const std::string& key, const std::string& value, + ONNX_NAMESPACE::TensorProto& tensor_proto) { auto* external_data = tensor_proto.mutable_external_data(); auto kvp_it = std::find_if( external_data->begin(), external_data->end(),