Skip to content

Commit ecadd5c

Browse files
committed
Start graph saving
1 parent c00d462 commit ecadd5c

File tree

14 files changed

+257
-112
lines changed

14 files changed

+257
-112
lines changed

include/onnxruntime/core/graph/graph.h

+9-32
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ namespace onnxruntime {
4141
class Graph;
4242
struct IndexedSubGraph;
4343
class Model;
44+
struct ModelSavingOptions;
4445
class OpSignature;
4546

4647
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
@@ -1153,29 +1154,6 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
11531154
const ONNX_NAMESPACE::GraphProto& ToGraphProto();
11541155
ONNX_NAMESPACE::GraphProto ToGraphProto() const;
11551156

1156-
// Options to align external initializer offset.
1157-
// For models running on CPU, ORT will try to use mmap to load external initializers.
1158-
// To use mmap, external initializer need to be offset aligned.
1159-
// ORT saves external initializers into signle data file, each initializer is accessed with
1160-
// offset(start position of initializer) and length(byte length of initializer) of the data file.
1161-
// To use mmap, each offset need to be aligned which means offset need to divisible by
1162-
// allocation granularity(64KB for windows and 4K for other OSes).
1163-
// With align_offset to true, ORT will align offset for large initializer when
1164-
// save ONNX model with external data file.
1165-
struct OffsetAlignmentInfo {
1166-
// Offset will always be page aligned and allocation granularity aligned for mmap support.
1167-
// This is done by padding previous tensor data with zeros keeping same length.
1168-
bool align_offset = false;
1169-
// Alignment threshold for size of data.
1170-
// Having a low threshold will waste file space for small initializers.
1171-
// Only when tensor's data size is > the page_align_threshold it will be force aligned.
1172-
// Default to 1MB.
1173-
int64_t align_threshold = 1048576;
1174-
// The allocation Granularity for mmap() support.
1175-
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
1176-
int64_t allocation_granularity = 65536;
1177-
};
1178-
11791157
/** Gets the GraphProto representation of this Graph
11801158
@param external_file_path File path of the binary file to use for initializers.
11811159
@param model_file_path path of the model file.
@@ -1186,15 +1164,7 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
11861164
*/
11871165
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
11881166
const std::filesystem::path& model_file_path,
1189-
size_t initializer_size_threshold,
1190-
const OffsetAlignmentInfo& align_info) const;
1191-
1192-
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
1193-
const std::filesystem::path& model_file_path,
1194-
size_t initializer_size_threshold) const {
1195-
OffsetAlignmentInfo default_options;
1196-
return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options);
1197-
}
1167+
const ModelSavingOptions& model_saving_options) const;
11981168

11991169
/** Gets the ISchemaRegistry instances being used with this Graph. */
12001170
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;
@@ -1519,6 +1489,13 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
15191489
Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto,
15201490
std::optional<std::string_view> new_name);
15211491

1492+
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitiallizersImpl(const std::filesystem::path& model_path,
1493+
const std::filesystem::path& external_file_path,
1494+
const ModelSavingOptions& model_saving_options,
1495+
ONNX_NAMESPACE::GraphProto& graph_proto,
1496+
std::ostream& external_stream,
1497+
int64_t& external_offset) const;
1498+
15221499
#endif
15231500

15241501
Version IrVersion() const noexcept {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#pragma once
5+
6+
namespace onnxruntime {
7+
8+
class PrepackedForSerialization;
9+
10+
// These options that affect how the model initializers are saved.
11+
// This includes options to align external initializer offset.
12+
// For models running on CPU, ORT will try to use mmap to load external
13+
// initializers. To use mmap, external initializer need to be offset aligned.
14+
// ORT saves external initializers into signle data file, each initializer is
15+
// accessed with offset(start position of initializer) and length(byte length of
16+
// initializer) of the data file. To use mmap, each offset need to be aligned
17+
// which means offset need to divisible by allocation granularity(64KB for
18+
// windows and 4K for other OSes). With align_offset to true, ORT will align
19+
// offset for large initializer when save ONNX model with external data file.
20+
struct ModelSavingOptions {
21+
explicit ModelSavingOptions(size_t size_threshold)
22+
: initializer_size_threshold(size_threshold) {}
23+
24+
// Mimimal initializer size in bytes to be externalized on disk
25+
size_t initializer_size_threshold;
26+
// Offset will always be page aligned and allocation granularity aligned for
27+
// mmap support. This is done by padding previous tensor data with zeros
28+
// keeping same length.
29+
bool align_offset = false;
30+
// Alignment threshold for size of data.
31+
// Having a low threshold will waste file space for small initializers.
32+
// Only when tensor's data size is > the page_align_threshold it will be force
33+
// aligned. Default to 1MB.
34+
int64_t align_threshold = 1048576;
35+
// The allocation Granularity for mmap() support.
36+
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
37+
int64_t allocation_granularity = 65536;
38+
// Optional pointer to a container of pre-packed initializers to be
39+
// embedded into the external initializers, so they can also be loaded
40+
// from disk.
41+
const PrepackedForSerialization* prepacked_for_save = nullptr;
42+
};
43+
44+
}

onnxruntime/core/framework/session_state.h

+4
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,10 @@ class SessionState {
374374
void SetSaveModeForPrepacks(bool saving_model,
375375
bool saving_ort_format);
376376

377+
const PrepackedForSerialization& GetPrepackedForSerialization() const {
378+
return prepacked_weights_for_serialization_;
379+
}
380+
377381
private:
378382
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SessionState);
379383

onnxruntime/core/graph/graph.cc

+121-8
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "core/graph/indexed_sub_graph.h"
2727
#include "core/graph/model.h"
2828
#include "core/graph/model_load_utils.h"
29+
#include "core/graph/model_saving_options.h"
2930
#include "core/graph/node_attr_utils.h"
3031
#include "core/graph/op.h"
3132
#include "core/graph/runtime_optimization_record_container.h"
@@ -4085,16 +4086,128 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
40854086
return result;
40864087
}
40874088

4088-
ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
4089-
const std::filesystem::path& model_file_path,
4090-
size_t initializer_size_threshold,
4091-
const OffsetAlignmentInfo& align_info) const {
4089+
// Create a recursive function that does bottom up with subgraphs
4090+
ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitiallizersImpl(
4091+
const std::filesystem::path& model_path,
4092+
const std::filesystem::path& external_file_path,
4093+
const ModelSavingOptions& model_saving_options,
4094+
ONNX_NAMESPACE::GraphProto& output_graph_proto,
4095+
std::ostream& external_stream,
4096+
int64_t& external_offset) const {
4097+
// update external_offset for alignment
4098+
// need to do padding before write actual tensor data as we do offset alignment at the begin of
4099+
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
4100+
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
4101+
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
4102+
auto compute_and_pad = [&external_stream](int64_t allocation_granularity, int64_t& external_offset) {
4103+
// Align to the larger of the page size or the allocation granularity
4104+
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), allocation_granularity);
4105+
// Align to the next page or alloc granularity boundary
4106+
int64_t new_external_offset = static_cast<int64_t>(
4107+
std::floor((external_offset + alignment_factor - 1) / alignment_factor)) *
4108+
alignment_factor;
4109+
4110+
// padding tensor with zeros for alignment
4111+
for (int64_t index = external_offset; index != new_external_offset; ++index) {
4112+
external_stream << '\0';
4113+
}
4114+
external_offset = new_external_offset;
4115+
};
4116+
4117+
// Process subgraphs
4118+
for (const auto& node : Nodes()) {
4119+
if (node.ContainsSubgraph()) {
4120+
// Let find this node in the output_graph_proto
4121+
auto hit = std::find_if(output_graph_proto.node().begin(),
4122+
output_graph_proto.node().end(),
4123+
[&node](const ONNX_NAMESPACE::NodeProto& proto) {
4124+
return proto.name() == node.Name();
4125+
});
4126+
ORT_ENFORCE(hit != output_graph_proto.node().end(), "Node ", node.Name(),
4127+
" not found in output_graph_proto");
4128+
auto& result_node = *hit;
4129+
for (const auto& [name, subgraph] : node.GetAttributeNameToSubgraphMap()) {
4130+
// Lets find this subgraph in the result_node
4131+
auto sub_hit = std::find_if(result_node.attribute().begin(),
4132+
result_node.attribute().end(),
4133+
[&name](const ONNX_NAMESPACE::AttributeProto& proto) {
4134+
return proto.name() == name;
4135+
});
4136+
ORT_ENFORCE(sub_hit != result_node.attribute().end(), "Subgraph ", name,
4137+
" not found in node ", node.Name());
4138+
}
4139+
}
4140+
}
4141+
4142+
// Add the initializers to the result graph.
4143+
for (const auto& initializer : graph_proto_->initializer()) {
4144+
#if !defined(DISABLE_SPARSE_TENSORS)
4145+
if (IsSparseInitializer(initializer.name())) {
4146+
// Sparse tensors are added to the ONNX file.
4147+
auto& sparse_initializer = *output_graph_proto.add_sparse_initializer();
4148+
auto status = utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer);
4149+
ORT_ENFORCE(status.IsOK(), "Failed to convert dense initializer to sparse");
4150+
} else {
4151+
#endif
4152+
// Dense tensors larger than the threshold are added to the external file.
4153+
TensorProto* output_proto = output_graph_proto.add_initializer();
4154+
4155+
std::vector<uint8_t> raw_data;
4156+
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data));
4157+
size_t tensor_bytes_size = raw_data.size();
4158+
if (tensor_bytes_size < model_saving_options.initializer_size_threshold) {
4159+
*output_proto = initializer;
4160+
continue;
4161+
}
4162+
4163+
// update external_offset for alignment
4164+
// need to do padding before write actual tensor data as we do offset alignment at the begin of
4165+
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
4166+
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
4167+
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
4168+
if (model_saving_options.align_offset && static_cast<int64_t>(tensor_bytes_size) >
4169+
model_saving_options.align_threshold) {
4170+
compute_and_pad(model_saving_options.allocation_granularity, external_offset);
4171+
}
4172+
4173+
if (!external_stream.write(reinterpret_cast<const char*>(raw_data.data()), tensor_bytes_size)) {
4174+
ORT_THROW("Failed to write external initializers to file: ", modified_external_file_path);
4175+
}
4176+
4177+
ExternalDataInfo::SetExternalLocationToProto(external_file_path, external_offset,
4178+
tensor_bytes_size, *output_proto);
4179+
4180+
output_proto->set_name(initializer.name());
4181+
output_proto->set_data_type(initializer.data_type());
4182+
for (int i = 0; i != initializer.dims_size(); ++i) {
4183+
output_proto->add_dims(initializer.dims(i));
4184+
}
4185+
output_proto->set_doc_string(initializer.doc_string());
4186+
4187+
external_offset += tensor_bytes_size;
4188+
4189+
const PrepackedForSerialization::Subgraph* prepacked_subgraph = nullptr;
4190+
if (model_saving_options.prepacked_for_save != nullptr) {
4191+
prepacked_subgraph = *model_saving_options.prepacked_for_save->FindOrCreateSubgraph(*this);
4192+
}
4193+
4194+
#if !defined(DISABLE_SPARSE_TENSORS)
4195+
}
4196+
#endif
4197+
}
4198+
}
4199+
4200+
ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(
4201+
const std::filesystem::path& external_file_path,
4202+
const std::filesystem::path& model_file_path,
4203+
const ModelSavingOptions& model_saving_options) const {
40924204
GraphProto result;
40934205
ToGraphProtoInternal(result);
40944206
ORT_ENFORCE(external_file_path.is_relative());
40954207
// If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could
40964208
// be empty. Else, save external data file in same directory as the model.
40974209
const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path;
4210+
const auto& model_path = ModelPath();
40984211

40994212
// Create the external file.
41004213
std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary);
@@ -4122,7 +4235,6 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
41224235
};
41234236

41244237
// Add the initializers to the result graph.
4125-
const auto& model_path = ModelPath();
41264238
#if !defined(DISABLE_SPARSE_TENSORS)
41274239
const auto sparse_end = sparse_tensor_names_.end();
41284240
#endif
@@ -4142,7 +4254,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
41424254
std::vector<uint8_t> raw_data;
41434255
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data));
41444256
size_t tensor_bytes_size = raw_data.size();
4145-
if (tensor_bytes_size < initializer_size_threshold) {
4257+
if (tensor_bytes_size < model_saving_options.initializer_size_threshold) {
41464258
*output_proto = initializer;
41474259
continue;
41484260
}
@@ -4152,8 +4264,9 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
41524264
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
41534265
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
41544266
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
4155-
if (align_info.align_offset && static_cast<int64_t>(tensor_bytes_size) > align_info.align_threshold) {
4156-
compute_and_pad(align_info.allocation_granularity, external_offset);
4267+
if (model_saving_options.align_offset && static_cast<int64_t>(tensor_bytes_size) >
4268+
model_saving_options.align_threshold) {
4269+
compute_and_pad(model_saving_options.allocation_granularity, external_offset);
41574270
}
41584271

41594272
if (!external_stream.write(reinterpret_cast<const char*>(raw_data.data()), tensor_bytes_size)) {

0 commit comments

Comments
 (0)