From 4a9daa6a41ee3422ebde736016b6c07264de5238 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Thu, 24 Jul 2025 09:11:27 -0400 Subject: [PATCH 1/4] Allow 0 byte read. Improve error message --- cpp/include/kvikio/mmap.hpp | 8 ++++--- cpp/src/mmap.cpp | 32 ++++++++++++++++++++------- cpp/tests/test_mmap.cpp | 44 +++++++++++++++++++++++++++++-------- 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/cpp/include/kvikio/mmap.hpp b/cpp/include/kvikio/mmap.hpp index f5245858ef..0d9773c929 100644 --- a/cpp/include/kvikio/mmap.hpp +++ b/cpp/include/kvikio/mmap.hpp @@ -59,7 +59,6 @@ class MmapHandle { * * @exception std::out_of_range if the read region specified by `offset` and `size` is * outside the initial region specified when the mapping handle was constructed - * @exception std::invalid_argument if the size is given but is 0 * @exception std::runtime_error if the mapping handle is closed */ std::size_t validate_and_adjust_read_args(std::optional const& size, @@ -86,6 +85,11 @@ class MmapHandle { * @param initial_map_offset File offset of the mapped region * @param mode Access mode * @param map_flags Flags to be passed to the system call `mmap`. See `mmap(2)` for details + * @exception std::out_of_range if `initial_map_offset` (left bound of the mapped region) is equal + * to or greater than the file size + * @exception std::out_of_range if the sum of `initial_map_offset` and `initial_map_size` (right + * bound of the mapped region) is greater than the file size + * @exception std::invalid_argument if `initial_map_size` is given but is 0 */ MmapHandle(std::string const& file_path, std::string const& flags = "r", @@ -154,7 +158,6 @@ class MmapHandle { * * @exception std::out_of_range if the read region specified by `offset` and `size` is * outside the initial region specified when the mapping handle was constructed - * @exception std::invalid_argument if the size is given but is 0 * @exception std::runtime_error if the mapping handle is closed */ std::size_t read(void* buf, @@ -174,7 +177,6 @@ class MmapHandle { * * @exception std::out_of_range if the read region specified by `offset` and `size` is * outside the initial region specified when the mapping handle was constructed - * @exception std::invalid_argument if the size is given but is 0 * @exception std::runtime_error if the mapping handle is closed * * @note The `std::future` object's `wait()` or `get()` should not be called after the lifetime of diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index 11b0416c29..5daddb07c0 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -282,8 +283,10 @@ MmapHandle::MmapHandle(std::string const& file_path, _file_size = get_file_size(_file_wrapper.fd()); if (_file_size == 0) { return; } - KVIKIO_EXPECT( - _initial_map_offset < _file_size, "Offset is past the end of file", std::out_of_range); + std::stringstream ss; + ss << "Offset must be less than the file size. initial_map_offset: " << _initial_map_offset + << ", file size: " << _file_size << "\n"; + KVIKIO_EXPECT(_initial_map_offset < _file_size, ss.str(), std::out_of_range); // An initial size of std::nullopt is a shorthand for "starting from _initial_map_offset to the // end of file". @@ -292,9 +295,11 @@ MmapHandle::MmapHandle(std::string const& file_path, KVIKIO_EXPECT( _initial_map_size > 0, "Mapped region should not be zero byte", std::invalid_argument); - KVIKIO_EXPECT(_initial_map_offset + _initial_map_size <= _file_size, - "Mapped region is past the end of file", - std::out_of_range); + + ss.str(""); + ss << "Mapped region is past the end of file. initial map offset: " << _initial_map_offset + << ", initial map size: " << _initial_map_size << ", file size: " << _file_size << "\n"; + KVIKIO_EXPECT(_initial_map_offset + _initial_map_size <= _file_size, ss.str(), std::out_of_range); auto const page_size = get_page_size(); _map_offset = detail::align_down(_initial_map_offset, page_size); @@ -401,6 +406,7 @@ std::size_t MmapHandle::read(void* buf, std::optional size, std::si KVIKIO_NVTX_FUNC_RANGE(); auto actual_size = validate_and_adjust_read_args(size, offset); + if (actual_size == 0) { return actual_size; } auto const is_dst_buf_host_mem = is_host_memory(buf); CUcontext ctx{}; @@ -420,6 +426,7 @@ std::future MmapHandle::pread(void* buf, KVIKIO_EXPECT(task_size <= defaults::bounce_buffer_size(), "bounce buffer size cannot be less than task size."); auto actual_size = validate_and_adjust_read_args(size, offset); + if (actual_size == 0) { return make_ready_future(actual_size); } auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_color); @@ -454,13 +461,22 @@ std::future MmapHandle::pread(void* buf, std::size_t MmapHandle::validate_and_adjust_read_args(std::optional const& size, std::size_t offset) { + std::stringstream ss; KVIKIO_EXPECT(!closed(), "Cannot read from a closed MmapHandle", std::runtime_error); - KVIKIO_EXPECT(offset < _file_size, "Offset is past the end of file", std::out_of_range); + + ss << "Offset is past the end of file. offset: " << offset << ", file size: " << _file_size + << "\n"; + KVIKIO_EXPECT(offset <= _file_size, ss.str(), std::out_of_range); + auto actual_size = size.has_value() ? size.value() : _file_size - offset; - KVIKIO_EXPECT(actual_size > 0, "Read size must be greater than 0", std::invalid_argument); + + ss.str(""); + ss << "Read is out of bound. offset: " << offset << ", actual size to read: " << actual_size + << ", initial map offset: " << _initial_map_offset + << ", initial map size: " << _initial_map_size << "\n"; KVIKIO_EXPECT(offset >= _initial_map_offset && offset + actual_size <= _initial_map_offset + _initial_map_size, - "Read is out of bound", + ss.str(), std::out_of_range); return actual_size; } diff --git a/cpp/tests/test_mmap.cpp b/cpp/tests/test_mmap.cpp index b52730ee74..2de89c5cd7 100644 --- a/cpp/tests/test_mmap.cpp +++ b/cpp/tests/test_mmap.cpp @@ -107,8 +107,9 @@ TEST_F(MmapTest, constructor_invalid_range) ThrowsMessage(HasSubstr("Mapped region is past the end of file"))); // init_file_offset is too large (by 1 char) - EXPECT_THAT([=] { kvikio::MmapHandle(_filepath, "r", std::nullopt, _file_size); }, - ThrowsMessage(HasSubstr("Offset is past the end of file"))); + EXPECT_THAT( + [=] { kvikio::MmapHandle(_filepath, "r", std::nullopt, _file_size); }, + ThrowsMessage(HasSubstr("Offset must be less than the file size"))); // init_size is 0 EXPECT_THAT( @@ -134,29 +135,36 @@ TEST_F(MmapTest, read_invalid_range) std::size_t const initial_file_offset{512}; std::vector out_host_buf(_file_size / sizeof(value_type), {}); - // file_offset is too large + // Right bound is too large EXPECT_THAT( [&] { kvikio::MmapHandle mmap_handle(_filepath, "r", initial_map_size, initial_file_offset); mmap_handle.read(out_host_buf.data(), initial_map_size, _file_size); }, - ThrowsMessage(HasSubstr("Offset is past the end of file"))); + ThrowsMessage(HasSubstr("Read is out of bound"))); - // file_offset is too small + // Left bound is too large EXPECT_THAT( [&] { kvikio::MmapHandle mmap_handle(_filepath, "r", initial_map_size, initial_file_offset); - mmap_handle.read(out_host_buf.data(), initial_map_size, initial_file_offset - 128); + mmap_handle.read(out_host_buf.data(), 0, initial_file_offset + initial_map_size + 1); }, ThrowsMessage(HasSubstr("Read is out of bound"))); - // size is 0 + EXPECT_THAT( + [&] { + kvikio::MmapHandle mmap_handle(_filepath, "r"); + mmap_handle.read(out_host_buf.data(), 0, _file_size + 1); + }, + ThrowsMessage(HasSubstr("Offset is past the end of file"))); + + // Left bound is too small EXPECT_THAT( [&] { kvikio::MmapHandle mmap_handle(_filepath, "r", initial_map_size, initial_file_offset); - mmap_handle.read(out_host_buf.data(), 0, initial_file_offset); + mmap_handle.read(out_host_buf.data(), initial_map_size, initial_file_offset - 128); }, - ThrowsMessage(HasSubstr("Read size must be greater than 0"))); + ThrowsMessage(HasSubstr("Read is out of bound"))); // size is too large EXPECT_THAT( @@ -167,6 +175,24 @@ TEST_F(MmapTest, read_invalid_range) ThrowsMessage(HasSubstr("Read is out of bound"))); } +TEST_F(MmapTest, read_valid_range) +{ + std::size_t const initial_map_size{1024}; + std::size_t const initial_file_offset{512}; + std::vector out_host_buf(_file_size / sizeof(value_type), {}); + + // size is 0 + EXPECT_NO_THROW({ + kvikio::MmapHandle mmap_handle(_filepath, "r", initial_map_size, initial_file_offset); + mmap_handle.read(out_host_buf.data(), 0, initial_file_offset + initial_map_size); + }); + + EXPECT_NO_THROW({ + kvikio::MmapHandle mmap_handle(_filepath, "r"); + mmap_handle.read(out_host_buf.data(), 0, _file_size); + }); +} + TEST_F(MmapTest, read_seq) { auto do_test = [&](std::size_t num_elements_to_skip, std::size_t num_elements_to_read) { From 381edd29c9f31bef23f3c2753fe73a2acf6d06c4 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Thu, 24 Jul 2025 09:56:59 -0400 Subject: [PATCH 2/4] Improve comments --- cpp/include/kvikio/mmap.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/include/kvikio/mmap.hpp b/cpp/include/kvikio/mmap.hpp index 0d9773c929..b5730727f1 100644 --- a/cpp/include/kvikio/mmap.hpp +++ b/cpp/include/kvikio/mmap.hpp @@ -80,8 +80,8 @@ class MmapHandle { * - "w": "open for writing, truncating the file first" * - "a": "open for writing, appending to the end of file if it exists" * - "+": "open for updating (reading and writing)" - * @param initial_map_size Size in bytes of the mapped region. If not specified, map the region - * starting from `initial_map_offset` to the end of file + * @param initial_map_size Size in bytes of the mapped region. Must be greater than 0. If not + * specified, map the region starting from `initial_map_offset` to the end of file * @param initial_map_offset File offset of the mapped region * @param mode Access mode * @param map_flags Flags to be passed to the system call `mmap`. See `mmap(2)` for details @@ -151,8 +151,8 @@ class MmapHandle { * destination buffer `buf` * * @param buf Address of the host or device memory (destination buffer) - * @param size Size in bytes to read. If not specified, read starts from `offset` to the end - * of file + * @param size Size in bytes to read. Can be 0 in which case nothing will be read. If not + * specified, read starts from `offset` to the end of file * @param offset File offset * @return Number of bytes that have been read * @@ -169,8 +169,8 @@ class MmapHandle { * destination buffer `buf` * * @param buf Address of the host or device memory (destination buffer) - * @param size Size in bytes to read. If not specified, read starts from `offset` to the end - * of file + * @param size Size in bytes to read. Can be 0 in which case nothing will be read. If not + * specified, read starts from `offset` to the end of file * @param offset File offset * @param task_size Size of each task in bytes * @return Future that on completion returns the size of bytes that were successfully read. From fa037cde713f84b59e5a2e86ffd148686aad4f21 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Fri, 25 Jul 2025 10:28:51 -0400 Subject: [PATCH 3/4] Address comments --- cpp/src/mmap.cpp | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index 5daddb07c0..02cfc8b816 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -461,23 +461,27 @@ std::future MmapHandle::pread(void* buf, std::size_t MmapHandle::validate_and_adjust_read_args(std::optional const& size, std::size_t offset) { - std::stringstream ss; - KVIKIO_EXPECT(!closed(), "Cannot read from a closed MmapHandle", std::runtime_error); + { + std::stringstream ss; + KVIKIO_EXPECT(!closed(), "Cannot read from a closed MmapHandle", std::runtime_error); - ss << "Offset is past the end of file. offset: " << offset << ", file size: " << _file_size - << "\n"; - KVIKIO_EXPECT(offset <= _file_size, ss.str(), std::out_of_range); + ss << "Offset is past the end of file. offset: " << offset << ", file size: " << _file_size + << "\n"; + KVIKIO_EXPECT(offset <= _file_size, ss.str(), std::out_of_range); + } auto actual_size = size.has_value() ? size.value() : _file_size - offset; - ss.str(""); - ss << "Read is out of bound. offset: " << offset << ", actual size to read: " << actual_size - << ", initial map offset: " << _initial_map_offset - << ", initial map size: " << _initial_map_size << "\n"; - KVIKIO_EXPECT(offset >= _initial_map_offset && - offset + actual_size <= _initial_map_offset + _initial_map_size, - ss.str(), - std::out_of_range); + { + std::stringstream ss; + ss << "Read is out of bound. offset: " << offset << ", actual size to read: " << actual_size + << ", initial map offset: " << _initial_map_offset + << ", initial map size: " << _initial_map_size << "\n"; + KVIKIO_EXPECT(offset >= _initial_map_offset && + offset + actual_size <= _initial_map_offset + _initial_map_size, + ss.str(), + std::out_of_range); + } return actual_size; } From b43afd223ec66a040b5869b7e406c2c2c4137152 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Fri, 25 Jul 2025 10:31:00 -0400 Subject: [PATCH 4/4] Update --- cpp/src/mmap.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index 02cfc8b816..671340cb09 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -283,10 +283,12 @@ MmapHandle::MmapHandle(std::string const& file_path, _file_size = get_file_size(_file_wrapper.fd()); if (_file_size == 0) { return; } - std::stringstream ss; - ss << "Offset must be less than the file size. initial_map_offset: " << _initial_map_offset - << ", file size: " << _file_size << "\n"; - KVIKIO_EXPECT(_initial_map_offset < _file_size, ss.str(), std::out_of_range); + { + std::stringstream ss; + ss << "Offset must be less than the file size. initial_map_offset: " << _initial_map_offset + << ", file size: " << _file_size << "\n"; + KVIKIO_EXPECT(_initial_map_offset < _file_size, ss.str(), std::out_of_range); + } // An initial size of std::nullopt is a shorthand for "starting from _initial_map_offset to the // end of file". @@ -296,10 +298,13 @@ MmapHandle::MmapHandle(std::string const& file_path, KVIKIO_EXPECT( _initial_map_size > 0, "Mapped region should not be zero byte", std::invalid_argument); - ss.str(""); - ss << "Mapped region is past the end of file. initial map offset: " << _initial_map_offset - << ", initial map size: " << _initial_map_size << ", file size: " << _file_size << "\n"; - KVIKIO_EXPECT(_initial_map_offset + _initial_map_size <= _file_size, ss.str(), std::out_of_range); + { + std::stringstream ss; + ss << "Mapped region is past the end of file. initial map offset: " << _initial_map_offset + << ", initial map size: " << _initial_map_size << ", file size: " << _file_size << "\n"; + KVIKIO_EXPECT( + _initial_map_offset + _initial_map_size <= _file_size, ss.str(), std::out_of_range); + } auto const page_size = get_page_size(); _map_offset = detail::align_down(_initial_map_offset, page_size);