Fix a long standing bug on file memory mapping on windows. #25833

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

yuslepukhin merged 4 commits into main from yuslepukhin/fix_memapping_windows

Aug 27, 2025

include/onnxruntime/core/graph/model_saving_options.h

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -9,36 +9,30 @@ class PrepackedWeightsForGraph;
  
    // These options affect how the model initializers are written to the external file.

    // This includes options to align external initializer offset.

    // For models running on CPU, ORT will try to use mmap to load external

    // initializers. To use mmap, external initializer need to be offset aligned.

    // ORT will try to use mmap to load external initializers.

    //

    // ORT saves external initializers into single data file, each initializer is

    // accessed with offset(start position of initializer) and length(byte length of

    // initializer) of the data file. To use mmap, each offset need to be aligned

    // which means offset need to divisible by allocation granularity(64KB for

    // windows and 4K for other OSes). With align_offset to true, ORT will align

    // offset for large initializer when save ONNX model with external data file.

    // initializer) of the data file. With align_offset to true, ORT will align

    // offset for large initializer (larger than  align_threshold)

    // when save ONNX model with external data file. It will align then to

    // on_disk_alignment value.

    struct ModelSavingOptions {

      explicit ModelSavingOptions(size_t size_threshold)

          : initializer_size_threshold(size_threshold) {}

      // Minimal initializer size in bytes to be externalized on disk

      size_t initializer_size_threshold;

      // Offset will always be page aligned and allocation granularity aligned for

      // mmap support. This is done by padding previous tensor data with zeros

      // keeping same length.

      // Offset will always be aligned for mmap support.

      // This is done by padding previous tensor data with zeros keeping same length.

      bool align_offset = false;

      // Alignment threshold for size of data.

      // Having a low threshold will waste file space for small initializers.

      // Only when tensor's data size is > the page_align_threshold it will be force

      // aligned. Default to 1MB.

      int64_t align_threshold = 1048576;

      // The allocation Granularity for mmap() support.

      // Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.

    #ifdef _WIN32

      int64_t allocation_granularity = 65536;

    #else

      int64_t allocation_granularity = 4096;

    #endif

      // Alignment factor for big tensors (bigger than align_threshold). Defaults to 4K.

      int64_t on_disk_alignment = 4096;

      // Force embed all external initializer into the Onnx file

      // Used for EPContext model generation while some nodes fallback on CPU which has external data dependency

      bool force_embed_external_ini = false;

onnxruntime/core/framework/tensor_external_data_info.cc

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -107,7 +107,7 @@ void ExternalDataInfo::SetExternalLocationToProto(const std::filesystem::path& e
  
    std::ostream& ExternalDataInfo::WritePrepackedToFileAndAddToProto(

        const PrepackedWeightsForGraph& prepacked_for_graph,

        const InlinedHashSet<std::string>& blob_keys, bool align,

        int64_t align_threshold, int64_t allocation_granularity,

        int64_t align_threshold, int64_t on_disk_alignment,

        std::ostream& os, int64_t& external_offset, ::ONNX_NAMESPACE::TensorProto& proto) {

      size_t key_count = 0;

      for (const auto& key : blob_keys) {

    @@ -120,7 +120,7 @@ std::ostream& ExternalDataInfo::WritePrepackedToFileAndAddToProto(
  
          const auto size_in_bytes = prepacked_weights->buffer_sizes_[i];

          if (align && static_cast<int64_t>(size_in_bytes) > align_threshold) {

            // return early on error

            if (!AlignAndPad(os, allocation_granularity, external_offset)) {

            if (!AlignAndPad(os, on_disk_alignment, external_offset)) {

              return os;

            }

          }

onnxruntime/core/framework/tensor_external_data_info.h

-Original file line number
+Diff line change
@@ Expand Up / @@ -41,15 +41,13 @@ class ExternalDataInfo { @@
                                              size_t tensor_bytes_size,
                                              ::ONNX_NAMESPACE::TensorProto& proto);
-      // Pads the output with zeros according to the specified allocation_granularity
+      // Pads the output with zeros according to the specified alignment_factor
       // It updates external_offset for alignment.
       // need to do padding before write actual tensor data as we do offset alignment at the begin of
-      // large tensors (offset need to be page aligned and allocation granularity aligned) like below:
+      // large tensors (offset need to be page aligned) like below:
       // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
       // |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->|
-      static std::ostream& AlignAndPad(std::ostream& stream, int64_t allocation_granularity, int64_t& external_offset) {
-        // Align to the larger of the page size or the allocation granularity
-        int64_t alignment_factor = std::max(static_cast<int64_t>(4096), allocation_granularity);
+      static std::ostream& AlignAndPad(std::ostream& stream, int64_t alignment_factor, int64_t& external_offset) {
         // Align to the next page or alloc granularity boundary
         SafeInt<int64_t> safe_external_offset = external_offset;
         int64_t new_external_offset = ((safe_external_offset + alignment_factor - 1) / alignment_factor) *
@@ Expand All / @@ -66,7 +64,7 @@ class ExternalDataInfo { @@
       static std::ostream& WritePrepackedToFileAndAddToProto(
           const PrepackedWeightsForGraph& prepacked_for_graph,
           const InlinedHashSet<std::string>& blob_keys,
-          bool align, int64_t align_threshold, int64_t allocation_granularity,
+          bool align, int64_t align_threshold, int64_t on_disk_alignment,
           std::ostream& os,
           int64_t& external_offset,
           ::ONNX_NAMESPACE::TensorProto& proto);
@@ Expand Down @@

onnxruntime/core/graph/graph.cc

-Original file line number
+Diff line change
@@ Expand Up / @@ -4536,14 +4536,14 @@ Status Graph::AddExternalInitializersToGraphProtoImpl( @@
             continue;
           }
-          // update external_offset for alignment
+          // update external_offset for alignment (if enabled)
           // need to do padding before write actual tensor data as we do offset alignment at the begin of
-          // large tensors (offset need to be page aligned and allocation granularity aligned) like below:
+          // large tensors (offset need to be page aligned) like below:
           // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
           // |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->|
           if (model_saving_options.align_offset && static_cast<int64_t>(tensor_bytes_size) >
                                                        model_saving_options.align_threshold) {
-            ORT_RETURN_IF_NOT(ExternalDataInfo::AlignAndPad(external_stream, model_saving_options.allocation_granularity,
+            ORT_RETURN_IF_NOT(ExternalDataInfo::AlignAndPad(external_stream, model_saving_options.on_disk_alignment,
                                                             external_offset),
                               "Failed writing external data to: ", model_external_file_path);
           }
@@ Expand Down Expand Up / @@ -4576,7 +4576,7 @@ Status Graph::AddExternalInitializersToGraphProtoImpl( @@
               auto& os = ExternalDataInfo::WritePrepackedToFileAndAddToProto(
                   *prepacked_weights_for_graph_, blob_keys_to_external_data,
                   model_saving_options.align_offset, model_saving_options.align_threshold,
-                  model_saving_options.allocation_granularity,
+                  model_saving_options.on_disk_alignment,
                   external_stream, external_offset, *output_proto);
               ORT_RETURN_IF_NOT(os.good(), "Failed to write pre-packed blobs to external file");
             }
@@ Expand Down @@

onnxruntime/core/platform/windows/env.cc

-Original file line number
+Diff line change
@@ Expand Up / @@ -29,6 +29,7 @@ limitations under the License. @@
     #include <gsl/gsl>
     #include "core/common/logging/logging.h"
     #include "core/common/narrow.h"
+    #include "core/common/safeint.h"
     #include "core/common/span_utils.h"
     #include "core/platform/env.h"
     #include "core/platform/scoped_resource.h"
@@ Expand Down Expand Up @@
       SYSTEM_INFO sysinfo;
       GetSystemInfo(&sysinfo);
-      static const DWORD page_size = sysinfo.dwPageSize;
       static const DWORD allocation_granularity = sysinfo.dwAllocationGranularity;
-      const FileOffsetType offset_to_page = offset % static_cast<FileOffsetType>(page_size);
-      const size_t mapped_length = length + static_cast<size_t>(offset_to_page);
-      const FileOffsetType mapped_offset = offset - offset_to_page;
-      if (mapped_offset % allocation_granularity != 0) {
-        const auto error_code = GetLastError();
-        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                               "mapped offset must be a multiple of the allocation granularity",
-                               " , mapped_offset = ", mapped_offset,
-                               " , allocation_granularity = ", allocation_granularity,
-                               " , errcode = ", error_code,
-                               " - ", std::system_category().message(error_code));
-      }
+      const FileOffsetType offset_to_granularity = offset % static_cast<FileOffsetType>(allocation_granularity);
+      const SIZE_T mapped_length = SafeInt<SIZE_T>(offset_to_granularity) + length;
+      const FileOffsetType mapped_offset = offset - offset_to_granularity;
+      assert((mapped_offset % allocation_granularity) == 0);
       void* const mapped_base = MapViewOfFile(file_mapping_handle.get(),
                                               FILE_MAP_READ,
                                               static_cast<DWORD>((mapped_offset >> 32) & 0xFFFFFFFF),
                                               static_cast<DWORD>(mapped_offset & 0xFFFFFFFF),
                                               mapped_length);
-      GSL_SUPPRESS(r.11)
+      if (mapped_base == nullptr) {
+        const auto error_code = GetLastError();
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
+                               "MapViewOfFile ", ToUTF8String(Basename(file_path)),
+                               " fail, errcode = ", error_code,
+                               " - ", std::system_category().message(error_code));
+      }
       mapped_memory =
-          MappedMemoryPtr{reinterpret_cast<char*>(mapped_base) + offset_to_page,
+          MappedMemoryPtr{reinterpret_cast<char*>(mapped_base) + offset_to_granularity,
                           [mapped_base](void*) {
                             UnmapFile(mapped_base);
                           }};
@@ Expand Down @@

onnxruntime/test/framework/save_model_with_external_initializers.cc

-Original file line number
+Diff line change
@@ Expand Up @@
               size_t tensor_offset;
               std::stringstream stream(entry.value());
               stream >> tensor_offset;
-              ORT_RETURN_IF_NOT(tensor_offset % model_saving_options.allocation_granularity == 0,
+              ORT_RETURN_IF_NOT(tensor_offset % model_saving_options.on_disk_alignment == 0,
                                 "tensor offset not align");
             }
           }
@@ Expand Down @@

onnxruntime/test/platform/file_io_test.cc

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -157,7 +157,6 @@ TEST(FileIoTest, MapFileIntoMemory) {
  
      SYSTEM_INFO sysinfo;

      GetSystemInfo(&sysinfo);

      static const auto page_size = sysinfo.dwPageSize;

      static const auto allocation_granularity = sysinfo.dwAllocationGranularity;

      ASSERT_GT(page_size, static_cast<DWORD>(0));

      TempFilePath tmp(ORT_TSTR("map_file_test_"));

    @@ -167,21 +166,10 @@ TEST(FileIoTest, MapFileIntoMemory) {
  
      const auto offsets_and_lengths = GenerateValidOffsetLengthPairs(

          0, expected_data.size(), page_size / 10);

      for (const auto& offset_and_length : offsets_and_lengths) {

        const auto offset = offset_and_length.first;

        const auto length = offset_and_length.second;

        // The offset must be a multiple of the allocation granularity

        if (offset % allocation_granularity != 0) {

          continue;

        }

      for (const auto& [offset, length] : offsets_and_lengths) {

        Env::MappedMemoryPtr mapped_memory{};

        auto status = Env::Default().MapFileIntoMemory(

            tmp.path.c_str(), offset, length, mapped_memory);

        ASSERT_TRUE(status.IsOK())

            << "MapFileIntoMemory failed for offset " << offset << " and length " << length

            << " with error: " << status.ErrorMessage();

        ASSERT_STATUS_OK(Env::Default().MapFileIntoMemory(

            tmp.path.c_str(), offset, length, mapped_memory));

        auto mapped_span = gsl::make_span(mapped_memory.get(), length);

    @@ -190,20 +178,11 @@ TEST(FileIoTest, MapFileIntoMemory) {
  
        ASSERT_TRUE(SpanEq(mapped_span, expected_data_span));

      }

      {

        Env::MappedMemoryPtr mapped_memory{};

        // invalid - offset is not a multiple of the allocation granularity

        ASSERT_FALSE(Env::Default().MapFileIntoMemory(

                                       tmp.path.c_str(), allocation_granularity * 3 / 2, page_size / 10, mapped_memory)

                         .IsOK());

      }

      {

        Env::MappedMemoryPtr mapped_memory{};

        // invalid - negative offset

        ASSERT_FALSE(Env::Default().MapFileIntoMemory(tmp.path.c_str(), -1, 0, mapped_memory).IsOK());

        ASSERT_STATUS_NOT_OK(Env::Default().MapFileIntoMemory(tmp.path.c_str(), -1, 0, mapped_memory));

      }

    }

    #endif

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix a long standing bug on file memory mapping on windows. #25833

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!