diff --git a/onnxruntime/core/framework/endian_utils.cc b/onnxruntime/core/framework/endian_utils.cc
index 640ba8df29442..e9167f7dc7d24 100644
--- a/onnxruntime/core/framework/endian_utils.cc
+++ b/onnxruntime/core/framework/endian_utils.cc
@@ -26,9 +26,9 @@ OutputIt ReverseCopy(BidirIt first, BidirIt last, OutputIt d_first) {
 
 }  // namespace
 
-void SwapByteOrderCopy(
-    size_t element_size_in_bytes,
-    gsl::span<const char> source_bytes, gsl::span<char> destination_bytes) {
+void SwapByteOrderCopy(size_t element_size_in_bytes,
+                       gsl::span<const unsigned char> source_bytes,
+                       gsl::span<unsigned char> destination_bytes) {
   assert(element_size_in_bytes > 0);
   assert(source_bytes.size_bytes() % element_size_in_bytes == 0);
   assert(source_bytes.size_bytes() == destination_bytes.size_bytes());
@@ -40,28 +40,38 @@ void SwapByteOrderCopy(
   for (size_t element_offset = 0, element_offset_end = source_bytes.size_bytes();
        element_offset < element_offset_end;
        element_offset += element_size_in_bytes) {
-    const auto source_element_bytes =
-        source_bytes.subspan(element_offset, element_size_in_bytes);
-    const auto dest_element_bytes =
-        destination_bytes.subspan(element_offset, element_size_in_bytes);
-    ReverseCopy(
-        source_element_bytes.data(),
-        source_element_bytes.data() + source_element_bytes.size_bytes(),
-        dest_element_bytes.data());
+    const auto source_element_bytes = source_bytes.subspan(element_offset, element_size_in_bytes);
+    const auto dest_element_bytes = destination_bytes.subspan(element_offset, element_size_in_bytes);
+    ReverseCopy(source_element_bytes.data(),
+                source_element_bytes.data() + source_element_bytes.size_bytes(),
+                dest_element_bytes.data());
   }
 }
 
 namespace detail {
 
-void CopyLittleEndian(size_t element_size_in_bytes, gsl::span<const char> source_bytes, gsl::span<char> destination_bytes) {
+Status CopyLittleEndian(size_t element_size_in_bytes,
+                        gsl::span<const unsigned char> source_bytes,
+                        gsl::span<unsigned char> destination_bytes) {
+  ORT_RETURN_IF(source_bytes.size_bytes() != destination_bytes.size_bytes(),
+                "source and destination buffer size mismatch");
+
   if (endian::native == endian::little) {
     std::memcpy(destination_bytes.data(), source_bytes.data(), source_bytes.size_bytes());
   } else {
     SwapByteOrderCopy(element_size_in_bytes, source_bytes, destination_bytes);
   }
+
+  return Status::OK();
 }
 
 }  // namespace detail
 
+common::Status ReadLittleEndian(size_t element_size,
+                                gsl::span<const unsigned char> source_bytes,
+                                gsl::span<unsigned char> destination_bytes) {
+  return detail::CopyLittleEndian(element_size, source_bytes, destination_bytes);
+}
+
 }  // namespace utils
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/framework/endian_utils.h b/onnxruntime/core/framework/endian_utils.h
index 7da16e670981e..92460679a9d5b 100644
--- a/onnxruntime/core/framework/endian_utils.h
+++ b/onnxruntime/core/framework/endian_utils.h
@@ -27,8 +27,9 @@ namespace utils {
  * @param source_bytes The source byte span.
  * @param destination_bytes The destination byte span.
  */
-void SwapByteOrderCopy(
-    size_t element_size_in_bytes, gsl::span<const char> source_bytes, gsl::span<char> destination_bytes);
+void SwapByteOrderCopy(size_t element_size_in_bytes,
+                       gsl::span<const unsigned char> source_bytes,
+                       gsl::span<unsigned char> destination_bytes);
 
 namespace detail {
 
@@ -36,43 +37,45 @@ namespace detail {
  * Copies between two buffers where one is little-endian and the other has
  * native endian-ness.
  */
-void CopyLittleEndian(
-    size_t element_size_in_bytes, gsl::span<const char> source_bytes, gsl::span<char> destination_bytes);
+Status CopyLittleEndian(size_t element_size_in_bytes,
+                        gsl::span<const unsigned char> source_bytes,
+                        gsl::span<unsigned char> destination_bytes);
 
 }  // namespace detail
 
 /**
  * Reads from a little-endian source.
  */
+common::Status ReadLittleEndian(size_t element_size,
+                                gsl::span<const unsigned char> source_bytes,
+                                gsl::span<unsigned char> destination_bytes);
+
+/**
+ * Reads from a little-endian source with check that T is trivially copyable.
+ * @remarks Check is skipped for if building with gcc v4
+ */
 template <typename T>
-common::Status ReadLittleEndian(gsl::span<const char> source_bytes, gsl::span<T> destination) {
+common::Status ReadLittleEndian(gsl::span<const unsigned char> source_bytes, gsl::span<T> destination) {
 // std::is_trivially_copyable is not implemented in older versions of GCC
 #if !defined(__GNUC__) || __GNUC__ >= 5
   static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
 #endif
-  ORT_RETURN_IF_NOT(source_bytes.size_bytes() == destination.size_bytes(),
-                    "source and destination buffer size mismatch");
-  const auto destination_bytes = gsl::make_span(
-      reinterpret_cast<char*>(destination.data()), destination.size_bytes());
-  detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
-  return common::Status::OK();
+  const auto destination_bytes = gsl::make_span(reinterpret_cast<unsigned char*>(destination.data()),
+                                                destination.size_bytes());
+  return ReadLittleEndian(sizeof(T), source_bytes, destination_bytes);
 }
 
 /**
  * Writes to a little-endian destination.
  */
 template <typename T>
-common::Status WriteLittleEndian(gsl::span<const T> source, gsl::span<char> destination_bytes) {
+common::Status WriteLittleEndian(gsl::span<const T> source, gsl::span<unsigned char> destination_bytes) {
 // std::is_trivially_copyable is not implemented in older versions of GCC
 #if !defined(__GNUC__) || __GNUC__ >= 5
   static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
 #endif
-  ORT_RETURN_IF_NOT(source.size_bytes() == destination_bytes.size_bytes(),
-                    "source and destination buffer size mismatch");
-  const auto source_bytes = gsl::make_span(
-      reinterpret_cast<const char*>(source.data()), source.size_bytes());
-  detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
-  return common::Status::OK();
+  const auto source_bytes = gsl::make_span(reinterpret_cast<const unsigned char*>(source.data()), source.size_bytes());
+  return detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
 }
 
 }  // namespace utils
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index e9a39e5f74579..0e6f63cae91e6 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -97,29 +97,44 @@ std::vector<int64_t> GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorP
 }
 
 // This function doesn't support string tensors
-template <typename T>
-static Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length, size_t expected_size,
-                                      /*out*/ T* p_data) {
+static Status UnpackTensorWithRawDataImpl(const void* raw_data, size_t raw_data_len,
+                                          size_t expected_num_elements, size_t element_size,
+                                          /*out*/ unsigned char* p_data) {
+  auto src = gsl::make_span<const unsigned char>(static_cast<const unsigned char*>(raw_data), raw_data_len);
+  auto dst = gsl::make_span<unsigned char>(p_data, expected_num_elements * element_size);
+
   size_t expected_size_in_bytes;
-  if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_size, sizeof(T), &expected_size_in_bytes)) {
+  if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_num_elements, element_size, &expected_size_in_bytes)) {
     return Status(onnxruntime::common::ONNXRUNTIME, onnxruntime::common::INVALID_ARGUMENT, "size overflow");
   }
-  if (raw_data_length != expected_size_in_bytes)
+
+  if (dst.size_bytes() != expected_size_in_bytes) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "UnpackTensor: the pre-allocated size does not match the raw data size, expected ",
-                           expected_size_in_bytes, ", got ", raw_data_length);
+                           expected_size_in_bytes, ", got ", dst.size_bytes());
+  }
 
-  const char* const raw_data_bytes = reinterpret_cast<const char*>(raw_data);
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian(
-      gsl::make_span(raw_data_bytes, raw_data_length), gsl::make_span(p_data, expected_size)));
-  return Status::OK();
+  // ReadLittleEndian checks src and dst buffers are the same size
+  return onnxruntime::utils::ReadLittleEndian(element_size, src, dst);
+}
+
+template <typename T>
+Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_len, size_t expected_num_elements,
+                               /*out*/ T* p_data) {
+  // std::is_trivially_copyable is not implemented in older versions of GCC
+#if !defined(__GNUC__) || __GNUC__ >= 5
+  static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
+#endif
+
+  return UnpackTensorWithRawDataImpl(raw_data, raw_data_len, expected_num_elements, sizeof(T),
+                                     reinterpret_cast<unsigned char*>(p_data));
 }
 
 static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                   const ORTCHAR_T* tensor_proto_dir,
                                   std::basic_string<ORTCHAR_T>& external_file_path,
                                   onnxruntime::FileOffsetType& file_offset,
-                                  SafeInt<size_t>& tensor_data_length) {
+                                  SafeInt<size_t>& tensor_byte_size) {
   ORT_RETURN_IF_NOT(onnxruntime::utils::HasExternalData(tensor_proto),
                     "Tensor does not have external data to read from.");
 
@@ -137,16 +152,12 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 
   file_offset = external_data_info->GetOffset();
 
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(
-      tensor_proto, &tensor_data_length));
+  ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &tensor_byte_size));
   const size_t external_data_length = external_data_info->GetLength();
 
-  ORT_RETURN_IF_NOT(
-      external_data_length == 0 ||
-          external_data_length == tensor_data_length,
-      "TensorProto external data size mismatch. ",
-      "Computed size: ", *&tensor_data_length,
-      ", external_data.length: ", external_data_length);
+  ORT_RETURN_IF_NOT(external_data_length == 0 || external_data_length == tensor_byte_size,
+                    "TensorProto external data size mismatch. Computed size: ", *&tensor_byte_size,
+                    ", external_data.length: ", external_data_length);
 
   return Status::OK();
 }
@@ -157,8 +168,8 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 // This function does not unpack string_data of an initializer tensor
 static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                         const ORTCHAR_T* tensor_proto_dir,
-                                        std::unique_ptr<uint8_t[]>& unpacked_tensor,
-                                        SafeInt<size_t>& tensor_data_length) {
+                                        std::unique_ptr<unsigned char[]>& unpacked_tensor,
+                                        SafeInt<size_t>& tensor_byte_size) {
   std::basic_string<ORTCHAR_T> external_file_path;
   onnxruntime::FileOffsetType file_offset;
   ORT_RETURN_IF_ERROR(GetExternalDataInfo(
@@ -166,14 +177,14 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso
       tensor_proto_dir,
       external_file_path,
       file_offset,
-      tensor_data_length));
+      tensor_byte_size));
 
-  unpacked_tensor.reset(new uint8_t[*&tensor_data_length]);
+  unpacked_tensor.reset(new unsigned char[*&tensor_byte_size]);
   ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(
       external_file_path.c_str(),
       file_offset,
-      tensor_data_length,
-      gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_data_length)));
+      tensor_byte_size,
+      gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_byte_size)));
 
   return Status::OK();
 }
@@ -182,91 +193,108 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso
 namespace onnxruntime {
 namespace utils {
 #if !defined(ORT_MINIMAL_BUILD)
-#define DEFINE_UNPACK_EXTERNAL_TENSOR(T)                                                                                   \
-  template <>                                                                                                              \
-  Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,                                           \
-                                      const ORTCHAR_T* tensor_proto_dir, size_t expected_size,                             \
-                                      /*out*/ T* p_data) {                                                                 \
-    ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");                                                                 \
-                                                                                                                           \
-    std::unique_ptr<uint8_t[]> unpacked_tensor;                                                                            \
-    SafeInt<size_t> tensor_byte_size = 0;                                                                                  \
-    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(                                                                         \
-        tensor,                                                                                                            \
-        tensor_proto_dir,                                                                                                  \
-        unpacked_tensor,                                                                                                   \
-        tensor_byte_size));                                                                                                \
-                                                                                                                           \
-    size_t element_count = tensor_byte_size / sizeof(T);                                                                   \
-    ORT_RETURN_IF_NOT(expected_size == element_count, "Expected data size does not match the actual external data size."); \
-    ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian(                                                              \
-        gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_byte_size),                                  \
-        gsl::make_span(p_data, expected_size)));                                                                           \
-                                                                                                                           \
-    return Status::OK();                                                                                                   \
-  }
+static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto& tensor,
+                                               const ORTCHAR_T* tensor_proto_dir,
+                                               size_t expected_num_elements, size_t element_size,
+                                               /*out*/ unsigned char* p_data) {
+  ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");
+
+  std::unique_ptr<unsigned char[]> unpacked_tensor;
+  SafeInt<size_t> tensor_byte_size = 0;
+  ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor, tensor_byte_size));
+
+  // ReadLittleEndian checks src and dst buffers are the same size
+  auto src_span = gsl::make_span(unpacked_tensor.get(), tensor_byte_size);
+  auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size);
+
+  return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span);
+}
+
+template <typename T>
+Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
+                                    const ORTCHAR_T* tensor_proto_dir, size_t expected_num_elements,
+                                    /*out*/ T* p_data) {
+  // std::is_trivially_copyable is not implemented in older versions of GCC
+#if !defined(__GNUC__) || __GNUC__ >= 5
+  static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
+#endif
+
+  return UnpackTensorWithExternalDataImpl(tensor, tensor_proto_dir, expected_num_elements, sizeof(T),
+                                          reinterpret_cast<unsigned char*>(p_data));
+}
 
-DEFINE_UNPACK_EXTERNAL_TENSOR(float)
-DEFINE_UNPACK_EXTERNAL_TENSOR(double)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint8_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int8_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int16_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint16_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int32_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int64_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint64_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint32_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(bool)
-DEFINE_UNPACK_EXTERNAL_TENSOR(MLFloat16)
-DEFINE_UNPACK_EXTERNAL_TENSOR(BFloat16)
+#define INSTANTIATE_UNPACK_EXTERNAL_TENSOR(type) \
+  template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const ORTCHAR_T*, size_t, type*);
+
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(float)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(double)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint8_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int8_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int16_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint16_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int32_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int64_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint64_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint32_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(bool)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(MLFloat16)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(BFloat16)
 
 template <>
 Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& /*tensor*/,
-                                    const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_size*/,
+                                    const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_num_elements*/,
                                     /*out*/ std::string* /*p_data*/) {
-  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                         "External data type cannot be STRING.");
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "External data type cannot be STRING.");
 }
 #endif  //!defined(ORT_MINIMAL_BUILD)
 
-// This macro doesn't work for Float16/bool/string tensors
-#define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size)                                                      \
-  template <>                                                                                                      \
-  Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,        \
-                      /*out*/ T* p_data, size_t expected_size) {                                                   \
-    if (nullptr == p_data) {                                                                                       \
-      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                                \
-      if (size == 0) return Status::OK();                                                                          \
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                                \
-    }                                                                                                              \
-    if (nullptr == p_data || Type != tensor.data_type()) {                                                         \
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                                \
-    }                                                                                                              \
-    if (raw_data != nullptr) {                                                                                     \
-      return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                               \
-    }                                                                                                              \
-    if (static_cast<size_t>(tensor.field_size()) != expected_size)                                                 \
-      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "corrupted protobuf data: tensor shape size(",         \
-                             expected_size, ") does not match the data size(", tensor.field_size(), ") in proto"); \
-    auto& data = tensor.field_name();                                                                              \
-    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                                    \
-      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                          \
-    return Status::OK();                                                                                           \
+// implementation of type specific unpack of data contained within the TensorProto
+template <typename T>
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                    /*out*/ T* p_data, size_t expected_num_elements);
+
+#define DEFINE_UNPACK_TENSOR_IMPL(T, Type, field_name, field_size)                                          \
+  template <>                                                                                               \
+  Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, \
+                      /*out*/ T* p_data, size_t expected_num_elements) {                                    \
+    if (nullptr == p_data) {                                                                                \
+      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                         \
+      if (size == 0) return Status::OK();                                                                   \
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                         \
+    }                                                                                                       \
+    if (nullptr == p_data || Type != tensor.data_type()) {                                                  \
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                         \
+    }                                                                                                       \
+    if (raw_data != nullptr) {                                                                              \
+      return UnpackTensorWithRawData(raw_data, raw_data_len, expected_num_elements, p_data);                \
+    }                                                                                                       \
+    if (static_cast<size_t>(tensor.field_size()) != expected_num_elements)                                  \
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,                                                 \
+                             "corrupted protobuf data: tensor shape size(", expected_num_elements,          \
+                             ") does not match the data size(", tensor.field_size(), ") in proto");         \
+    auto& data = tensor.field_name();                                                                       \
+    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                             \
+      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                   \
+    return Status::OK();                                                                                    \
   }
 
 // TODO: complex64 complex128
-DEFINE_UNPACK_TENSOR(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size)
-DEFINE_UNPACK_TENSOR(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size);
-DEFINE_UNPACK_TENSOR(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size)
-DEFINE_UNPACK_TENSOR(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
-DEFINE_UNPACK_TENSOR(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
-
-// doesn't support raw data
+DEFINE_UNPACK_TENSOR_IMPL(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size);
+DEFINE_UNPACK_TENSOR_IMPL(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
+
+//
+// Specializations of UnpackTensor that need custom handling for the input type
+//
+
+// UnpackTensor<std::string>. Note: doesn't support raw data
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
                     /*out*/ std::string* p_data, size_t expected_size) {
@@ -289,6 +317,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw
 
   return Status::OK();
 }
+
+// UnpackTensor<bool>
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
                     /*out*/ bool* p_data, size_t expected_size) {
@@ -314,6 +344,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
 
   return Status::OK();
 }
+
+// UnpackTensor<MLFloat16>
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
                     /*out*/ MLFloat16* p_data, size_t expected_size) {
@@ -346,6 +378,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
   return Status::OK();
 }
 
+// UnpackTensor<BFloat16>
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
                     /*out*/ BFloat16* p_data, size_t expected_size) {
@@ -380,6 +413,49 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
   return Status::OK();
 }
 
+// UnpackTensor from raw data, external data or the type specific data field.
+// Uses the model path to construct the full path for loading external data. In case when model_path is empty
+// it uses current directory.
+template <typename T>
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+                    /*out*/ T* p_data, size_t expected_num_elements) {
+#if !defined(ORT_MINIMAL_BUILD)
+  if (HasExternalData(tensor)) {
+    return UnpackTensorWithExternalData(
+        tensor,
+        model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
+        expected_num_elements,
+        p_data);
+  }
+#else
+  ORT_UNUSED_PARAMETER(model_path);
+  ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build.");
+#endif
+
+  return HasRawData(tensor)
+             ? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_num_elements)
+             : UnpackTensor(tensor, nullptr, 0, p_data, expected_num_elements);
+}
+
+// instantiate the UnpackTensor variant that supports external data
+#define INSTANTIATE_UNPACK_TENSOR(type) \
+  template Status UnpackTensor(const ONNX_NAMESPACE::TensorProto&, const Path&, type* p_data, size_t);
+
+INSTANTIATE_UNPACK_TENSOR(float)
+INSTANTIATE_UNPACK_TENSOR(double)
+INSTANTIATE_UNPACK_TENSOR(uint8_t)
+INSTANTIATE_UNPACK_TENSOR(int8_t)
+INSTANTIATE_UNPACK_TENSOR(int16_t)
+INSTANTIATE_UNPACK_TENSOR(uint16_t)
+INSTANTIATE_UNPACK_TENSOR(int32_t)
+INSTANTIATE_UNPACK_TENSOR(int64_t)
+INSTANTIATE_UNPACK_TENSOR(uint64_t)
+INSTANTIATE_UNPACK_TENSOR(uint32_t)
+INSTANTIATE_UNPACK_TENSOR(bool)
+INSTANTIATE_UNPACK_TENSOR(MLFloat16)
+INSTANTIATE_UNPACK_TENSOR(BFloat16)
+INSTANTIATE_UNPACK_TENSOR(std::string)
+
 #define CASE_PROTO_TRACE(X, Y)                                                                     \
   case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:                             \
     if (!IAllocator::CalcMemSizeForArrayWithAlignment<alignment>(size, sizeof(Y), out)) {          \
@@ -472,12 +548,6 @@ static void UnInitTensor(void* param) noexcept {
   delete p;
 }
 
-#define CASE_PROTO(X, Y)                                                                                            \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:                                              \
-    ORT_RETURN_IF_ERROR(                                                                                            \
-        UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, static_cast<size_t>(tensor_size))); \
-    break;
-
 class AutoDelete {
  public:
   OrtCallback d{nullptr, nullptr};
@@ -530,6 +600,14 @@ static void MoveOrtCallback(OrtCallback& from, OrtCallback& to) {
   from.f = nullptr;
   from.param = nullptr;
 }
+
+#define CASE_PROTO(X, Y)                                                      \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:        \
+    ORT_RETURN_IF_ERROR(                                                      \
+        UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len,                 \
+                        (Y*)preallocated, static_cast<size_t>(tensor_size))); \
+    break;
+
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable : 6239)
@@ -632,7 +710,8 @@ Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* model_path,
             deleter.param = new UnInitializeParam{preallocated, preallocated_size, ele_type};
           }
           ORT_RETURN_IF_ERROR(UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
-                                                        (std::string*)preallocated, static_cast<size_t>(tensor_size)));
+                                                        static_cast<std::string*>(preallocated),
+                                                        static_cast<size_t>(tensor_size)));
           break;
         default: {
           std::ostringstream ostr;
@@ -1034,41 +1113,42 @@ template common::Status GetSizeInBytesFromTensorProto<kAllocAlignment>(const ONN
                                                                        size_t* out);
 template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
 
-#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                                         \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {                \
-    if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {                \
-      ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(                                       \
-          initializer,                                                                     \
-          model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(), \
-          unpacked_tensor,                                                                 \
-          tensor_byte_size));                                                              \
-      tensor_data_length = tensor_byte_size;                                               \
-      return Status::OK();                                                                 \
-    } else {                                                                               \
-      size_t element_count = 0;                                                            \
-      if (initializer.has_raw_data()) {                                                    \
-        tensor_byte_size = initializer.raw_data().size();                                  \
-        element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                           \
-      } else {                                                                             \
-        element_count = initializer.DATA_SIZE();                                           \
-        tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                           \
-      }                                                                                    \
-      tensor_data_length = tensor_byte_size;                                               \
-      unpacked_tensor.reset(new uint8_t[tensor_data_length]);                              \
-      return onnxruntime::utils::UnpackTensor(                                             \
-          initializer,                                                                     \
-          initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,            \
-          initializer.has_raw_data() ? initializer.raw_data().size() : 0,                  \
-          reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.get()), element_count);          \
-    }                                                                                      \
-    break;                                                                                 \
+#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                              \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {     \
+    size_t element_count = 0;                                                   \
+    if (initializer.has_raw_data()) {                                           \
+      tensor_byte_size = initializer.raw_data().size();                         \
+      element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                  \
+    } else {                                                                    \
+      element_count = initializer.DATA_SIZE();                                  \
+      tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                  \
+    }                                                                           \
+    tensor_byte_size_out = tensor_byte_size;                                    \
+    unpacked_tensor.reset(new unsigned char[tensor_byte_size_out]);             \
+    return onnxruntime::utils::UnpackTensor(                                    \
+        initializer,                                                            \
+        initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,   \
+        initializer.has_raw_data() ? initializer.raw_data().size() : 0,         \
+        reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.get()), element_count); \
+    break;                                                                      \
   }
 
 Status UnpackInitializerData(const onnx::TensorProto& initializer,
                              const Path& model_path,
-                             std::unique_ptr<uint8_t[]>& unpacked_tensor,
-                             size_t& tensor_data_length) {
-  SafeInt<size_t> tensor_byte_size = tensor_data_length;
+                             std::unique_ptr<unsigned char[]>& unpacked_tensor,
+                             size_t& tensor_byte_size_out) {
+  SafeInt<size_t> tensor_byte_size;
+
+  if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
+    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
+        initializer,
+        model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
+        unpacked_tensor,
+        tensor_byte_size));
+    tensor_byte_size_out = tensor_byte_size;
+    return Status::OK();
+  }
+
   switch (initializer.data_type()) {
     CASE_UNPACK(FLOAT, float, float_data_size);
     CASE_UNPACK(DOUBLE, double, double_data_size);
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index da00e8eab5bb7..1f3f0c844aafb 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -238,16 +238,6 @@ inline bool HasName(const ONNX_NAMESPACE::NodeProto& node_proto) {
   return node_proto.has_name();
 }
 
-#if !defined(ORT_MINIMAL_BUILD)
-// Unpack tensor which contains external data. Uses the tensor_proto_dir to construct the full path for external data.
-// If tensor_proto_dir == nullptr then uses the current directory instead.
-// This function does not unpack string_data of a tensor
-template <typename T>
-Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
-                                    const ORTCHAR_T* tensor_proto_dir, size_t expected_size,
-                                    /*out*/ T* p_data);
-#endif  // !defined(ORT_MINIMAL_BUILD)
-
 // UnpackTensor from raw data or the type specific data field. Does not handle external data.
 // If the tensor does not contain raw data then raw_data should be nullptr and raw_data_len should be 0.
 template <typename T>
@@ -258,37 +248,21 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
 // Uses the model path to construct the full path for loading external data. In case when model_path is empty
 // it uses current directory.
 template <typename T>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path, /*out*/ T* p_data, size_t expected_size) {
-#if !defined(ORT_MINIMAL_BUILD)
-  if (HasExternalData(tensor)) {
-    return UnpackTensorWithExternalData(
-        tensor,
-        model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
-        expected_size,
-        p_data);
-  }
-#else
-  ORT_UNUSED_PARAMETER(model_path);
-  ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build.");
-#endif
-
-  return HasRawData(tensor)
-             ? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_size)
-             : UnpackTensor(tensor, nullptr, 0, p_data, expected_size);
-}
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+                    /*out*/ T* p_data, size_t expected_size);
 
 /**
  * Unpack the data from an initializer tensor
  * Please note, this function does not unpack string_data of an initializer tensor
  * @param initializer       given initializer tensor
  * @param initializer_dir   model_path to construct external data dir path. When this is empty, current dir is used.
- * @param unpacked_tensor   the data from the initaizlier in uint8_t* form
+ * @param unpacked_tensor   the data from the initializer in byte form
  * @param tensor_byte_size  the byte size of the unpacked_tensor
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
                                      const Path& model_path,
-                                     std::unique_ptr<uint8_t[]>& unpacked_tensor,
+                                     std::unique_ptr<unsigned char[]>& unpacked_tensor,
                                      size_t& tensor_byte_size) ORT_MUST_USE_RESULT;
 
 }  // namespace utils
diff --git a/onnxruntime/test/framework/endian_test.cc b/onnxruntime/test/framework/endian_test.cc
index fce9741925862..938adf2cd3db5 100644
--- a/onnxruntime/test/framework/endian_test.cc
+++ b/onnxruntime/test/framework/endian_test.cc
@@ -14,7 +14,7 @@ namespace test {
 
 TEST(EndianTest, EndiannessDetection) {
   const uint16_t test_value = 0x1234;
-  const char* test_value_first_byte = reinterpret_cast<const char*>(&test_value);
+  const unsigned char* test_value_first_byte = reinterpret_cast<const unsigned char*>(&test_value);
   if (endian::native == endian::little) {
     EXPECT_EQ(*test_value_first_byte, 0x34);
   } else if (endian::native == endian::big) {
@@ -23,13 +23,13 @@ TEST(EndianTest, EndiannessDetection) {
 }
 
 TEST(EndianTest, SwapByteOrderCopy) {
-  const auto src = std::vector<char>{
+  const auto src = std::vector<unsigned char>{
       'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'};
 
-  auto result = std::vector<char>(src.size());
+  auto result = std::vector<unsigned char>(src.size());
   {
     SwapByteOrderCopy(3, gsl::make_span(src), gsl::make_span(result));
-    const auto expected = std::vector<char>{
+    const auto expected = std::vector<unsigned char>{
         'c', 'b', 'a',
         'f', 'e', 'd',
         'i', 'h', 'g',
@@ -39,7 +39,7 @@ TEST(EndianTest, SwapByteOrderCopy) {
 
   {
     SwapByteOrderCopy(4, gsl::make_span(src), gsl::make_span(result));
-    const auto expected = std::vector<char>{
+    const auto expected = std::vector<unsigned char>{
         'd', 'c', 'b', 'a',
         'h', 'g', 'f', 'e',
         'l', 'k', 'j', 'i'};
diff --git a/onnxruntime/test/framework/tensorutils_test.cc b/onnxruntime/test/framework/tensorutils_test.cc
index a048df52fcc26..39f0f6d6f1b2a 100644
--- a/onnxruntime/test/framework/tensorutils_test.cc
+++ b/onnxruntime/test/framework/tensorutils_test.cc
@@ -76,8 +76,9 @@ TEST(TensorProtoUtilsTest, UnpackTensor) {
   EXPECT_FALSE(status.IsOK());
 }
 
+namespace {
 template <typename T>
-static std::vector<T> CreateValues() {
+std::vector<T> CreateValues() {
   return {1, 2, 3, 4};
 }
 
@@ -86,17 +87,49 @@ std::vector<std::string> CreateValues<std::string>() {
   return {"one", "two", "three", "four"};
 }
 
+template <>
+std::vector<bool> CreateValues() {
+  return {true, false, false, true};
+}
+
+template <>
+std::vector<MLFloat16> CreateValues<MLFloat16>() {
+  return {MLFloat16(0.f), MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f)};
+}
+
+template <>
+std::vector<BFloat16> CreateValues<BFloat16>() {
+  return {BFloat16(0.f), BFloat16(1.f), BFloat16(2.f), BFloat16(3.f)};
+}
+
+template <typename T>
+void WriteDataToFile(FILE* fp, const std::vector<T>& test_data) {
+  size_t size_in_bytes = test_data.size() * sizeof(T);
+  ASSERT_EQ(size_in_bytes, fwrite(test_data.data(), 1, size_in_bytes, fp));
+}
+
+std::unique_ptr<bool[]> BoolDataFromVector(const std::vector<bool>& test_data) {
+  auto arr = onnxruntime::make_unique<bool[]>(test_data.size());
+  std::copy(std::begin(test_data), std::end(test_data), arr.get());
+  return arr;
+}
+
+// work around std::vector<bool> storing data in bits
+template <>
+void WriteDataToFile<bool>(FILE* fp, const std::vector<bool>& test_data) {
+  auto arr = BoolDataFromVector(test_data);
+  size_t size_in_bytes = test_data.size() * sizeof(bool);
+  ASSERT_EQ(size_in_bytes, fwrite(arr.get(), 1, size_in_bytes, fp));
+}
+
 template <typename T>
-static void CreateTensorWithExternalData(
-    TensorProto_DataType type,
-    const std::vector<T>& test_data,
-    std::basic_string<ORTCHAR_T>& filename,
-    TensorProto& tensor_proto) {
+void CreateTensorWithExternalData(TensorProto_DataType type, const std::vector<T>& test_data,
+                                  std::basic_string<ORTCHAR_T>& filename,
+                                  TensorProto& tensor_proto) {
   // Create external data
   FILE* fp;
   CreateTestFile(fp, filename);
-  size_t size_in_bytes = test_data.size() * sizeof(T);
-  ASSERT_EQ(size_in_bytes, fwrite(test_data.data(), 1, size_in_bytes, fp));
+  WriteDataToFile(fp, test_data);
   ASSERT_EQ(0, fclose(fp));
 
   // set the tensor_proto to reference this external data
@@ -109,15 +142,7 @@ static void CreateTensorWithExternalData(
 }
 
 template <typename T>
-static void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path) {
-  // Create external data
-  std::basic_string<ORTCHAR_T> filename(ORT_TSTR("tensor_XXXXXX"));
-  TensorProto tensor_proto;
-  auto test_data = CreateValues<T>();
-  CreateTensorWithExternalData<T>(type, test_data, filename, tensor_proto);
-  std::unique_ptr<ORTCHAR_T, decltype(&DeleteFileFromDisk)> file_deleter(const_cast<ORTCHAR_T*>(filename.c_str()),
-                                                                         DeleteFileFromDisk);
-
+void UnpackAndValidate(const TensorProto& tensor_proto, const Path& model_path, const std::vector<T>& test_data) {
   // Unpack tensor with external data
   std::vector<T> val(test_data.size());
   auto st = utils::UnpackTensor(tensor_proto, model_path, val.data(), test_data.size());
@@ -125,15 +150,45 @@ static void TestUnpackExternalTensor(TensorProto_DataType type, const Path& mode
 
   // Validate data
   for (size_t i = 0; i < test_data.size(); i++) {
-    ASSERT_EQ(val[i], test_data[i]);
+    ASSERT_TRUE(val[i] == test_data[i]);  // need to use ASSERT_TRUE with '==' to handle MFLoat16 and BFloat16
+  }
+}
+
+template <>
+void UnpackAndValidate<bool>(const TensorProto& tensor_proto, const Path& model_path,
+                             const std::vector<bool>& test_data) {
+  // Unpack tensor with external data
+  auto arr = onnxruntime::make_unique<bool[]>(test_data.size());
+  auto st = utils::UnpackTensor(tensor_proto, model_path, arr.get(), test_data.size());
+  ASSERT_TRUE(st.IsOK()) << st.ErrorMessage();
+
+  // Validate data
+  for (size_t i = 0; i < test_data.size(); i++) {
+    ASSERT_TRUE(arr[i] == test_data[i]);
   }
 }
 
+template <typename T>
+void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path) {
+  // Create external data
+  std::basic_string<ORTCHAR_T> filename(ORT_TSTR("tensor_XXXXXX"));
+  TensorProto tensor_proto;
+  auto test_data = CreateValues<T>();
+  CreateTensorWithExternalData<T>(type, test_data, filename, tensor_proto);
+  std::unique_ptr<ORTCHAR_T, decltype(&DeleteFileFromDisk)> file_deleter(const_cast<ORTCHAR_T*>(filename.c_str()),
+                                                                         DeleteFileFromDisk);
+  UnpackAndValidate(tensor_proto, model_path, test_data);
+}
+}  // namespace
 TEST(TensorProtoUtilsTest, UnpackTensorWithExternalData) {
   Path model_path;
   TestUnpackExternalTensor<float>(TensorProto_DataType_FLOAT, model_path);
   TestUnpackExternalTensor<double>(TensorProto_DataType_DOUBLE, model_path);
   TestUnpackExternalTensor<int32_t>(TensorProto_DataType_INT32, model_path);
+  TestUnpackExternalTensor<int8_t>(TensorProto_DataType_INT8, model_path);
+  TestUnpackExternalTensor<MLFloat16>(TensorProto_DataType_FLOAT16, model_path);
+  TestUnpackExternalTensor<BFloat16>(TensorProto_DataType_BFLOAT16, model_path);
+  TestUnpackExternalTensor<bool>(TensorProto_DataType_BOOL, model_path);
 }
 
 template <typename T>
diff --git a/onnxruntime/test/optimizer/initializer_test.cc b/onnxruntime/test/optimizer/initializer_test.cc
index 3e7c4eb9de46f..eaaad9ad0997c 100644
--- a/onnxruntime/test/optimizer/initializer_test.cc
+++ b/onnxruntime/test/optimizer/initializer_test.cc
@@ -21,18 +21,17 @@ namespace test {
 namespace {
 template <typename T>
 Status WriteExternalDataFile(gsl::span<const T> data, const PathString& path, ScopedFileDeleter& file_deleter) {
-  std::vector<char> data_bytes(data.size_bytes());
+  std::vector<unsigned char> data_bytes(data.size_bytes());
   ORT_RETURN_IF_ERROR(onnxruntime::utils::WriteLittleEndian(data, gsl::make_span(data_bytes)));
   std::ofstream out{path, std::ios::binary | std::ios::trunc};
-  ORT_RETURN_IF_NOT(out && out.write(data_bytes.data(), data_bytes.size()),
+  ORT_RETURN_IF_NOT(out && out.write(reinterpret_cast<const char*>(data_bytes.data()), data_bytes.size()),
                     "out && out.write(data_bytes.data(), data_bytes.size()) was false");
   file_deleter = ScopedFileDeleter{path};
   return Status::OK();
 }
 
-void SetTensorProtoExternalData(
-    const std::string& key, const std::string& value,
-    ONNX_NAMESPACE::TensorProto& tensor_proto) {
+void SetTensorProtoExternalData(const std::string& key, const std::string& value,
+                                ONNX_NAMESPACE::TensorProto& tensor_proto) {
   auto* external_data = tensor_proto.mutable_external_data();
   auto kvp_it = std::find_if(
       external_data->begin(), external_data->end(),