Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 10 additions & 16 deletions include/onnxruntime/core/graph/model_saving_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,30 @@ class PrepackedWeightsForGraph;

// These options affect how the model initializers are written to the external file.
// This includes options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external
// initializers. To use mmap, external initializer need to be offset aligned.
// ORT will try to use mmap to load external initializers.
//
// ORT saves external initializers into single data file, each initializer is
// accessed with offset(start position of initializer) and length(byte length of
// initializer) of the data file. To use mmap, each offset need to be aligned
// which means offset need to divisible by allocation granularity(64KB for
// windows and 4K for other OSes). With align_offset to true, ORT will align
// offset for large initializer when save ONNX model with external data file.
// initializer) of the data file. With align_offset to true, ORT will align
// offset for large initializer (larger than align_threshold)
// when save ONNX model with external data file. It will align then to
// on_disk_alignment value.
struct ModelSavingOptions {
explicit ModelSavingOptions(size_t size_threshold)
: initializer_size_threshold(size_threshold) {}

// Minimal initializer size in bytes to be externalized on disk
size_t initializer_size_threshold;
// Offset will always be page aligned and allocation granularity aligned for
// mmap support. This is done by padding previous tensor data with zeros
// keeping same length.
// Offset will always be aligned for mmap support.
// This is done by padding previous tensor data with zeros keeping same length.
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force
// aligned. Default to 1MB.
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
#ifdef _WIN32
int64_t allocation_granularity = 65536;
#else
int64_t allocation_granularity = 4096;
#endif
// Alignment factor for big tensors (bigger than align_threshold). Defaults to 4K.
int64_t on_disk_alignment = 4096;
// Force embed all external initializer into the Onnx file
// Used for EPContext model generation while some nodes fallback on CPU which has external data dependency
bool force_embed_external_ini = false;
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/framework/tensor_external_data_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void ExternalDataInfo::SetExternalLocationToProto(const std::filesystem::path& e
std::ostream& ExternalDataInfo::WritePrepackedToFileAndAddToProto(
const PrepackedWeightsForGraph& prepacked_for_graph,
const InlinedHashSet<std::string>& blob_keys, bool align,
int64_t align_threshold, int64_t allocation_granularity,
int64_t align_threshold, int64_t on_disk_alignment,
std::ostream& os, int64_t& external_offset, ::ONNX_NAMESPACE::TensorProto& proto) {
size_t key_count = 0;
for (const auto& key : blob_keys) {
Expand All @@ -120,7 +120,7 @@ std::ostream& ExternalDataInfo::WritePrepackedToFileAndAddToProto(
const auto size_in_bytes = prepacked_weights->buffer_sizes_[i];
if (align && static_cast<int64_t>(size_in_bytes) > align_threshold) {
// return early on error
if (!AlignAndPad(os, allocation_granularity, external_offset)) {
if (!AlignAndPad(os, on_disk_alignment, external_offset)) {
return os;
}
}
Expand Down
10 changes: 4 additions & 6 deletions onnxruntime/core/framework/tensor_external_data_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,13 @@ class ExternalDataInfo {
size_t tensor_bytes_size,
::ONNX_NAMESPACE::TensorProto& proto);

// Pads the output with zeros according to the specified allocation_granularity
// Pads the output with zeros according to the specified alignment_factor
// It updates external_offset for alignment.
// need to do padding before write actual tensor data as we do offset alignment at the begin of
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
// large tensors (offset need to be page aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->|
static std::ostream& AlignAndPad(std::ostream& stream, int64_t allocation_granularity, int64_t& external_offset) {
// Align to the larger of the page size or the allocation granularity
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), allocation_granularity);
static std::ostream& AlignAndPad(std::ostream& stream, int64_t alignment_factor, int64_t& external_offset) {
// Align to the next page or alloc granularity boundary
SafeInt<int64_t> safe_external_offset = external_offset;
int64_t new_external_offset = ((safe_external_offset + alignment_factor - 1) / alignment_factor) *
Expand All @@ -66,7 +64,7 @@ class ExternalDataInfo {
static std::ostream& WritePrepackedToFileAndAddToProto(
const PrepackedWeightsForGraph& prepacked_for_graph,
const InlinedHashSet<std::string>& blob_keys,
bool align, int64_t align_threshold, int64_t allocation_granularity,
bool align, int64_t align_threshold, int64_t on_disk_alignment,
std::ostream& os,
int64_t& external_offset,
::ONNX_NAMESPACE::TensorProto& proto);
Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4536,14 +4536,14 @@ Status Graph::AddExternalInitializersToGraphProtoImpl(
continue;
}

// update external_offset for alignment
// update external_offset for alignment (if enabled)
// need to do padding before write actual tensor data as we do offset alignment at the begin of
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
// large tensors (offset need to be page aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->|
if (model_saving_options.align_offset && static_cast<int64_t>(tensor_bytes_size) >
model_saving_options.align_threshold) {
ORT_RETURN_IF_NOT(ExternalDataInfo::AlignAndPad(external_stream, model_saving_options.allocation_granularity,
ORT_RETURN_IF_NOT(ExternalDataInfo::AlignAndPad(external_stream, model_saving_options.on_disk_alignment,
external_offset),
"Failed writing external data to: ", model_external_file_path);
}
Expand Down Expand Up @@ -4576,7 +4576,7 @@ Status Graph::AddExternalInitializersToGraphProtoImpl(
auto& os = ExternalDataInfo::WritePrepackedToFileAndAddToProto(
*prepacked_weights_for_graph_, blob_keys_to_external_data,
model_saving_options.align_offset, model_saving_options.align_threshold,
model_saving_options.allocation_granularity,
model_saving_options.on_disk_alignment,
external_stream, external_offset, *output_proto);
ORT_RETURN_IF_NOT(os.good(), "Failed to write pre-packed blobs to external file");
}
Expand Down
29 changes: 14 additions & 15 deletions onnxruntime/core/platform/windows/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ limitations under the License.
#include <gsl/gsl>
#include "core/common/logging/logging.h"
#include "core/common/narrow.h"
#include "core/common/safeint.h"
#include "core/common/span_utils.h"
#include "core/platform/env.h"
#include "core/platform/scoped_resource.h"
Expand Down Expand Up @@ -439,30 +440,28 @@ Status WindowsEnv::MapFileIntoMemory(_In_z_ const ORTCHAR_T* file_path,
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);

static const DWORD page_size = sysinfo.dwPageSize;
static const DWORD allocation_granularity = sysinfo.dwAllocationGranularity;
const FileOffsetType offset_to_page = offset % static_cast<FileOffsetType>(page_size);
const size_t mapped_length = length + static_cast<size_t>(offset_to_page);
const FileOffsetType mapped_offset = offset - offset_to_page;
if (mapped_offset % allocation_granularity != 0) {
const auto error_code = GetLastError();
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"mapped offset must be a multiple of the allocation granularity",
" , mapped_offset = ", mapped_offset,
" , allocation_granularity = ", allocation_granularity,
" , errcode = ", error_code,
" - ", std::system_category().message(error_code));
}
const FileOffsetType offset_to_granularity = offset % static_cast<FileOffsetType>(allocation_granularity);
const SIZE_T mapped_length = SafeInt<SIZE_T>(offset_to_granularity) + length;
const FileOffsetType mapped_offset = offset - offset_to_granularity;
assert((mapped_offset % allocation_granularity) == 0);

void* const mapped_base = MapViewOfFile(file_mapping_handle.get(),
FILE_MAP_READ,
static_cast<DWORD>((mapped_offset >> 32) & 0xFFFFFFFF),
static_cast<DWORD>(mapped_offset & 0xFFFFFFFF),
mapped_length);
GSL_SUPPRESS(r.11)

if (mapped_base == nullptr) {
const auto error_code = GetLastError();
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"MapViewOfFile ", ToUTF8String(Basename(file_path)),
" fail, errcode = ", error_code,
" - ", std::system_category().message(error_code));
}

mapped_memory =
MappedMemoryPtr{reinterpret_cast<char*>(mapped_base) + offset_to_page,
MappedMemoryPtr{reinterpret_cast<char*>(mapped_base) + offset_to_granularity,
[mapped_base](void*) {
UnmapFile(mapped_base);
}};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
size_t tensor_offset;
std::stringstream stream(entry.value());
stream >> tensor_offset;
ORT_RETURN_IF_NOT(tensor_offset % model_saving_options.allocation_granularity == 0,
ORT_RETURN_IF_NOT(tensor_offset % model_saving_options.on_disk_alignment == 0,
"tensor offset not align");
}
}
Expand Down
29 changes: 4 additions & 25 deletions onnxruntime/test/platform/file_io_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ TEST(FileIoTest, MapFileIntoMemory) {
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
static const auto page_size = sysinfo.dwPageSize;
static const auto allocation_granularity = sysinfo.dwAllocationGranularity;
ASSERT_GT(page_size, static_cast<DWORD>(0));

TempFilePath tmp(ORT_TSTR("map_file_test_"));
Expand All @@ -167,21 +166,10 @@ TEST(FileIoTest, MapFileIntoMemory) {
const auto offsets_and_lengths = GenerateValidOffsetLengthPairs(
0, expected_data.size(), page_size / 10);

for (const auto& offset_and_length : offsets_and_lengths) {
const auto offset = offset_and_length.first;
const auto length = offset_and_length.second;

// The offset must be a multiple of the allocation granularity
if (offset % allocation_granularity != 0) {
continue;
}

for (const auto& [offset, length] : offsets_and_lengths) {
Env::MappedMemoryPtr mapped_memory{};
auto status = Env::Default().MapFileIntoMemory(
tmp.path.c_str(), offset, length, mapped_memory);
ASSERT_TRUE(status.IsOK())
<< "MapFileIntoMemory failed for offset " << offset << " and length " << length
<< " with error: " << status.ErrorMessage();
ASSERT_STATUS_OK(Env::Default().MapFileIntoMemory(
tmp.path.c_str(), offset, length, mapped_memory));

auto mapped_span = gsl::make_span(mapped_memory.get(), length);

Expand All @@ -190,20 +178,11 @@ TEST(FileIoTest, MapFileIntoMemory) {
ASSERT_TRUE(SpanEq(mapped_span, expected_data_span));
}

{
Env::MappedMemoryPtr mapped_memory{};

// invalid - offset is not a multiple of the allocation granularity
ASSERT_FALSE(Env::Default().MapFileIntoMemory(
tmp.path.c_str(), allocation_granularity * 3 / 2, page_size / 10, mapped_memory)
.IsOK());
}

{
Env::MappedMemoryPtr mapped_memory{};

// invalid - negative offset
ASSERT_FALSE(Env::Default().MapFileIntoMemory(tmp.path.c_str(), -1, 0, mapped_memory).IsOK());
ASSERT_STATUS_NOT_OK(Env::Default().MapFileIntoMemory(tmp.path.c_str(), -1, 0, mapped_memory));
}
}
#endif
Expand Down
Loading