From 4b07875505bdd5c4b635fbf821401fcd4573dffd Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 17 Dec 2025 14:33:52 +0000 Subject: [PATCH] Revert #148937 (Remove initialized-bytes tracking from `BorrowedBuf` and `BorrowedCursor`) This caused several performance regressions because of existing code which uses `Read::read` and therefore requires full buffer initialization. This is particularly a problem when the same buffer is re-used for multiple read calls since this means it needs to be fully re-initialized each time. There is still some benefit to landing the API changes, but we will have to add private APIs so that the existing infrastructure can track and avoid redundant initialization. --- library/core/src/io/borrowed_buf.rs | 162 +++++++++++++++--- library/coretests/tests/io/borrowed_buf.rs | 90 ++++++++-- library/std/src/fs/tests.rs | 2 + library/std/src/io/buffered/bufreader.rs | 9 + .../std/src/io/buffered/bufreader/buffer.rs | 27 ++- library/std/src/io/buffered/tests.rs | 24 +++ library/std/src/io/copy.rs | 13 ++ library/std/src/io/mod.rs | 53 +++++- library/std/src/io/tests.rs | 9 + library/std/src/io/util.rs | 2 +- library/std/src/io/util/tests.rs | 8 + library/std/src/net/tcp/tests.rs | 2 + library/std/src/process/tests.rs | 2 + library/std/src/sys/fd/hermit.rs | 2 +- library/std/src/sys/fd/unix.rs | 4 +- library/std/src/sys/fs/solid.rs | 2 +- .../src/sys/net/connection/socket/hermit.rs | 2 +- .../src/sys/net/connection/socket/solid.rs | 2 +- .../std/src/sys/net/connection/socket/unix.rs | 2 +- .../src/sys/net/connection/socket/windows.rs | 2 +- .../std/src/sys/pal/sgx/abi/usercalls/mod.rs | 2 +- library/std/src/sys/pal/windows/handle.rs | 4 +- .../std/src/sys/process/windows/child_pipe.rs | 2 +- library/std/src/sys/stdio/zkvm.rs | 2 +- 24 files changed, 366 insertions(+), 63 deletions(-) diff --git a/library/core/src/io/borrowed_buf.rs b/library/core/src/io/borrowed_buf.rs index b765b96fd00a5..088dea7812945 100644 --- a/library/core/src/io/borrowed_buf.rs +++ b/library/core/src/io/borrowed_buf.rs @@ -2,26 +2,27 @@ use crate::fmt::{self, Debug, Formatter}; use crate::mem::{self, MaybeUninit}; +use crate::{cmp, ptr}; -/// A borrowed buffer of initially uninitialized bytes, which is incrementally filled. +/// A borrowed byte buffer which is incrementally filled and initialized. /// -/// This type makes it safer to work with `MaybeUninit` buffers, such as to read into a buffer -/// without having to initialize it first. It tracks the region of bytes that have been filled and -/// the region that remains uninitialized. +/// This type is a sort of "double cursor". It tracks three regions in the buffer: a region at the beginning of the +/// buffer that has been logically filled with data, a region that has been initialized at some point but not yet +/// logically filled, and a region at the end that is fully uninitialized. The filled region is guaranteed to be a +/// subset of the initialized region. /// -/// The contents of the buffer can be visualized as: +/// In summary, the contents of the buffer can be visualized as: /// ```not_rust -/// [ capacity ] -/// [ len: filled and initialized | capacity - len: uninitialized ] +/// [ capacity ] +/// [ filled | unfilled ] +/// [ initialized | uninitialized ] /// ``` /// -/// Note that `BorrowedBuf` does not distinguish between uninitialized data and data that was -/// previously initialized but no longer contains valid data. -/// -/// A `BorrowedBuf` is created around some existing data (or capacity for data) via a unique -/// reference (`&mut`). The `BorrowedBuf` can be configured (e.g., using `clear` or `set_len`), but -/// cannot be directly written. To write into the buffer, use `unfilled` to create a -/// `BorrowedCursor`. The cursor has write-only access to the unfilled portion of the buffer. +/// A `BorrowedBuf` is created around some existing data (or capacity for data) via a unique reference +/// (`&mut`). The `BorrowedBuf` can be configured (e.g., using `clear` or `set_init`), but cannot be +/// directly written. To write into the buffer, use `unfilled` to create a `BorrowedCursor`. The cursor +/// has write-only access to the unfilled portion of the buffer (you can think of it as a +/// write-only iterator). /// /// The lifetime `'data` is a bound on the lifetime of the underlying data. pub struct BorrowedBuf<'data> { @@ -29,11 +30,14 @@ pub struct BorrowedBuf<'data> { buf: &'data mut [MaybeUninit], /// The length of `self.buf` which is known to be filled. filled: usize, + /// The length of `self.buf` which is known to be initialized. + init: usize, } impl Debug for BorrowedBuf<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("BorrowedBuf") + .field("init", &self.init) .field("filled", &self.filled) .field("capacity", &self.capacity()) .finish() @@ -44,22 +48,24 @@ impl Debug for BorrowedBuf<'_> { impl<'data> From<&'data mut [u8]> for BorrowedBuf<'data> { #[inline] fn from(slice: &'data mut [u8]) -> BorrowedBuf<'data> { + let len = slice.len(); + BorrowedBuf { - // SAFETY: Always in bounds. We treat the buffer as uninitialized, even though it's - // already initialized. + // SAFETY: initialized data never becoming uninitialized is an invariant of BorrowedBuf buf: unsafe { (slice as *mut [u8]).as_uninit_slice_mut().unwrap() }, filled: 0, + init: len, } } } /// Creates a new `BorrowedBuf` from an uninitialized buffer. /// -/// Use `set_filled` if part of the buffer is known to be already filled. +/// Use `set_init` if part of the buffer is known to be already initialized. impl<'data> From<&'data mut [MaybeUninit]> for BorrowedBuf<'data> { #[inline] fn from(buf: &'data mut [MaybeUninit]) -> BorrowedBuf<'data> { - BorrowedBuf { buf, filled: 0 } + BorrowedBuf { buf, filled: 0, init: 0 } } } @@ -68,11 +74,14 @@ impl<'data> From<&'data mut [MaybeUninit]> for BorrowedBuf<'data> { /// Use `BorrowedCursor::with_unfilled_buf` instead for a safer alternative. impl<'data> From> for BorrowedBuf<'data> { #[inline] - fn from(buf: BorrowedCursor<'data>) -> BorrowedBuf<'data> { + fn from(mut buf: BorrowedCursor<'data>) -> BorrowedBuf<'data> { + let init = buf.init_mut().len(); BorrowedBuf { - // SAFETY: Always in bounds. We treat the buffer as uninitialized. + // SAFETY: no initialized byte is ever uninitialized as per + // `BorrowedBuf`'s invariant buf: unsafe { buf.buf.buf.get_unchecked_mut(buf.buf.filled..) }, filled: 0, + init, } } } @@ -90,6 +99,12 @@ impl<'data> BorrowedBuf<'data> { self.filled } + /// Returns the length of the initialized part of the buffer. + #[inline] + pub fn init_len(&self) -> usize { + self.init + } + /// Returns a shared reference to the filled portion of the buffer. #[inline] pub fn filled(&self) -> &[u8] { @@ -144,16 +159,33 @@ impl<'data> BorrowedBuf<'data> { /// Clears the buffer, resetting the filled region to empty. /// - /// The contents of the buffer are not modified. + /// The number of initialized bytes is not changed, and the contents of the buffer are not modified. #[inline] pub fn clear(&mut self) -> &mut Self { self.filled = 0; self } + + /// Asserts that the first `n` bytes of the buffer are initialized. + /// + /// `BorrowedBuf` assumes that bytes are never de-initialized, so this method does nothing when called with fewer + /// bytes than are already known to be initialized. + /// + /// # Safety + /// + /// The caller must ensure that the first `n` unfilled bytes of the buffer have already been initialized. + #[inline] + pub unsafe fn set_init(&mut self, n: usize) -> &mut Self { + self.init = cmp::max(self.init, n); + self + } } /// A writeable view of the unfilled portion of a [`BorrowedBuf`]. /// +/// The unfilled portion consists of an initialized and an uninitialized part; see [`BorrowedBuf`] +/// for details. +/// /// Data can be written directly to the cursor by using [`append`](BorrowedCursor::append) or /// indirectly by getting a slice of part or all of the cursor and writing into the slice. In the /// indirect case, the caller must call [`advance`](BorrowedCursor::advance) after writing to inform @@ -206,17 +238,48 @@ impl<'a> BorrowedCursor<'a> { self.buf.filled } + /// Returns a mutable reference to the initialized portion of the cursor. + #[inline] + pub fn init_mut(&mut self) -> &mut [u8] { + // SAFETY: We only slice the initialized part of the buffer, which is always valid + unsafe { + let buf = self.buf.buf.get_unchecked_mut(self.buf.filled..self.buf.init); + buf.assume_init_mut() + } + } + /// Returns a mutable reference to the whole cursor. /// /// # Safety /// - /// The caller must not uninitialize any previously initialized bytes. + /// The caller must not uninitialize any bytes in the initialized portion of the cursor. #[inline] pub unsafe fn as_mut(&mut self) -> &mut [MaybeUninit] { // SAFETY: always in bounds unsafe { self.buf.buf.get_unchecked_mut(self.buf.filled..) } } + /// Advances the cursor by asserting that `n` bytes have been filled. + /// + /// After advancing, the `n` bytes are no longer accessible via the cursor and can only be + /// accessed via the underlying buffer. I.e., the buffer's filled portion grows by `n` elements + /// and its unfilled portion (and the capacity of this cursor) shrinks by `n` elements. + /// + /// If less than `n` bytes initialized (by the cursor's point of view), `set_init` should be + /// called first. + /// + /// # Panics + /// + /// Panics if there are less than `n` bytes initialized. + #[inline] + pub fn advance(&mut self, n: usize) -> &mut Self { + // The subtraction cannot underflow by invariant of this type. + assert!(n <= self.buf.init - self.buf.filled); + + self.buf.filled += n; + self + } + /// Advances the cursor by asserting that `n` bytes have been filled. /// /// After advancing, the `n` bytes are no longer accessible via the cursor and can only be @@ -225,11 +288,42 @@ impl<'a> BorrowedCursor<'a> { /// /// # Safety /// - /// The caller must ensure that the first `n` bytes of the cursor have been initialized. `n` - /// must not exceed the remaining capacity of this cursor. + /// The caller must ensure that the first `n` bytes of the cursor have been properly + /// initialised. #[inline] - pub unsafe fn advance(&mut self, n: usize) -> &mut Self { + pub unsafe fn advance_unchecked(&mut self, n: usize) -> &mut Self { self.buf.filled += n; + self.buf.init = cmp::max(self.buf.init, self.buf.filled); + self + } + + /// Initializes all bytes in the cursor. + #[inline] + pub fn ensure_init(&mut self) -> &mut Self { + // SAFETY: always in bounds and we never uninitialize these bytes. + let uninit = unsafe { self.buf.buf.get_unchecked_mut(self.buf.init..) }; + + // SAFETY: 0 is a valid value for MaybeUninit and the length matches the allocation + // since it is comes from a slice reference. + unsafe { + ptr::write_bytes(uninit.as_mut_ptr(), 0, uninit.len()); + } + self.buf.init = self.buf.capacity(); + + self + } + + /// Asserts that the first `n` unfilled bytes of the cursor are initialized. + /// + /// `BorrowedBuf` assumes that bytes are never de-initialized, so this method does nothing when + /// called with fewer bytes than are already known to be initialized. + /// + /// # Safety + /// + /// The caller must ensure that the first `n` bytes of the buffer have already been initialized. + #[inline] + pub unsafe fn set_init(&mut self, n: usize) -> &mut Self { + self.buf.init = cmp::max(self.buf.init, self.buf.filled + n); self } @@ -247,6 +341,10 @@ impl<'a> BorrowedCursor<'a> { self.as_mut()[..buf.len()].write_copy_of_slice(buf); } + // SAFETY: We just added the entire contents of buf to the filled section. + unsafe { + self.set_init(buf.len()); + } self.buf.filled += buf.len(); } @@ -269,9 +367,17 @@ impl<'a> BorrowedCursor<'a> { // there, one could mark some bytes as initialized even though there aren't. assert!(core::ptr::addr_eq(prev_ptr, buf.buf)); - // SAFETY: These bytes were filled in the `BorrowedBuf`, so they're filled in the cursor - // too, because the buffer wasn't replaced. - self.buf.filled += buf.filled; + let filled = buf.filled; + let init = buf.init; + + // Update `init` and `filled` fields with what was written to the buffer. + // `self.buf.filled` was the starting length of the `BorrowedBuf`. + // + // SAFETY: These amounts of bytes were initialized/filled in the `BorrowedBuf`, + // and therefore they are initialized/filled in the cursor too, because the + // buffer wasn't replaced. + self.buf.init = self.buf.filled + init; + self.buf.filled += filled; res } diff --git a/library/coretests/tests/io/borrowed_buf.rs b/library/coretests/tests/io/borrowed_buf.rs index 730ba04465a11..aaa98d26ff8b9 100644 --- a/library/coretests/tests/io/borrowed_buf.rs +++ b/library/coretests/tests/io/borrowed_buf.rs @@ -8,6 +8,7 @@ fn new() { let mut rbuf: BorrowedBuf<'_> = buf.into(); assert_eq!(rbuf.filled().len(), 0); + assert_eq!(rbuf.init_len(), 16); assert_eq!(rbuf.capacity(), 16); assert_eq!(rbuf.unfilled().capacity(), 16); } @@ -19,16 +20,27 @@ fn uninit() { let mut rbuf: BorrowedBuf<'_> = buf.into(); assert_eq!(rbuf.filled().len(), 0); + assert_eq!(rbuf.init_len(), 0); assert_eq!(rbuf.capacity(), 16); assert_eq!(rbuf.unfilled().capacity(), 16); } +#[test] +fn initialize_unfilled() { + let buf: &mut [_] = &mut [MaybeUninit::uninit(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + rbuf.unfilled().ensure_init(); + + assert_eq!(rbuf.init_len(), 16); +} + #[test] fn advance_filled() { let buf: &mut [_] = &mut [0; 16]; let mut rbuf: BorrowedBuf<'_> = buf.into(); - unsafe { rbuf.unfilled().advance(1) }; + rbuf.unfilled().advance(1); assert_eq!(rbuf.filled().len(), 1); assert_eq!(rbuf.unfilled().capacity(), 15); @@ -39,7 +51,7 @@ fn clear() { let buf: &mut [_] = &mut [255; 16]; let mut rbuf: BorrowedBuf<'_> = buf.into(); - unsafe { rbuf.unfilled().advance(16) }; + rbuf.unfilled().advance(16); assert_eq!(rbuf.filled().len(), 16); assert_eq!(rbuf.unfilled().capacity(), 0); @@ -49,9 +61,33 @@ fn clear() { assert_eq!(rbuf.filled().len(), 0); assert_eq!(rbuf.unfilled().capacity(), 16); - unsafe { rbuf.unfilled().advance(16) }; + assert_eq!(rbuf.unfilled().init_mut(), [255; 16]); +} + +#[test] +fn set_init() { + let buf: &mut [_] = &mut [MaybeUninit::zeroed(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + unsafe { + rbuf.set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); + + rbuf.unfilled().advance(4); - assert_eq!(rbuf.filled(), [255; 16]); + unsafe { + rbuf.set_init(2); + } + + assert_eq!(rbuf.init_len(), 8); + + unsafe { + rbuf.set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); } #[test] @@ -61,6 +97,7 @@ fn append() { rbuf.unfilled().append(&[0; 8]); + assert_eq!(rbuf.init_len(), 8); assert_eq!(rbuf.filled().len(), 8); assert_eq!(rbuf.filled(), [0; 8]); @@ -68,6 +105,7 @@ fn append() { rbuf.unfilled().append(&[1; 16]); + assert_eq!(rbuf.init_len(), 16); assert_eq!(rbuf.filled().len(), 16); assert_eq!(rbuf.filled(), [1; 16]); } @@ -87,12 +125,43 @@ fn reborrow_written() { assert_eq!(cursor.written(), 32); assert_eq!(buf.unfilled().written(), 32); + assert_eq!(buf.init_len(), 32); assert_eq!(buf.filled().len(), 32); let filled = buf.filled(); assert_eq!(&filled[..16], [1; 16]); assert_eq!(&filled[16..], [2; 16]); } +#[test] +fn cursor_set_init() { + let buf: &mut [_] = &mut [MaybeUninit::zeroed(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + unsafe { + rbuf.unfilled().set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); + assert_eq!(rbuf.unfilled().init_mut().len(), 8); + assert_eq!(unsafe { rbuf.unfilled().as_mut().len() }, 16); + + rbuf.unfilled().advance(4); + + unsafe { + rbuf.unfilled().set_init(2); + } + + assert_eq!(rbuf.init_len(), 8); + + unsafe { + rbuf.unfilled().set_init(8); + } + + assert_eq!(rbuf.init_len(), 12); + assert_eq!(rbuf.unfilled().init_mut().len(), 8); + assert_eq!(unsafe { rbuf.unfilled().as_mut().len() }, 12); +} + #[test] fn cursor_with_unfilled_buf() { let buf: &mut [_] = &mut [MaybeUninit::uninit(); 16]; @@ -100,30 +169,31 @@ fn cursor_with_unfilled_buf() { let mut cursor = rbuf.unfilled(); cursor.with_unfilled_buf(|buf| { - assert_eq!(buf.capacity(), 16); buf.unfilled().append(&[1, 2, 3]); assert_eq!(buf.filled(), &[1, 2, 3]); }); + assert_eq!(cursor.init_mut().len(), 0); assert_eq!(cursor.written(), 3); cursor.with_unfilled_buf(|buf| { assert_eq!(buf.capacity(), 13); + assert_eq!(buf.init_len(), 0); - unsafe { - buf.unfilled().as_mut().write_filled(0); - buf.unfilled().advance(4) - }; + buf.unfilled().ensure_init(); + buf.unfilled().advance(4); }); + assert_eq!(cursor.init_mut().len(), 9); assert_eq!(cursor.written(), 7); cursor.with_unfilled_buf(|buf| { assert_eq!(buf.capacity(), 9); + assert_eq!(buf.init_len(), 9); }); + assert_eq!(cursor.init_mut().len(), 9); assert_eq!(cursor.written(), 7); - assert_eq!(rbuf.len(), 7); assert_eq!(rbuf.filled(), &[1, 2, 3, 0, 0, 0, 0]); } diff --git a/library/std/src/fs/tests.rs b/library/std/src/fs/tests.rs index bcaafcfee787a..0a5d1153d860c 100644 --- a/library/std/src/fs/tests.rs +++ b/library/std/src/fs/tests.rs @@ -709,6 +709,8 @@ fn file_test_read_buf() { let mut file = check!(File::open(filename)); check!(file.read_buf(buf.unfilled())); assert_eq!(buf.filled(), &[1, 2, 3, 4]); + // File::read_buf should omit buffer initialization. + assert_eq!(buf.init_len(), 4); check!(fs::remove_file(filename)); } diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs index 69c260b5410af..40441dc057d0d 100644 --- a/library/std/src/io/buffered/bufreader.rs +++ b/library/std/src/io/buffered/bufreader.rs @@ -284,6 +284,15 @@ impl BufReader { } } +// This is only used by a test which asserts that the initialization-tracking is correct. +#[cfg(test)] +impl BufReader { + #[allow(missing_docs)] + pub fn initialized(&self) -> usize { + self.buf.initialized() + } +} + impl BufReader { /// Seeks relative to the current position. If the new position lies within the buffer, /// the buffer will not be flushed, allowing for more efficient seeks. diff --git a/library/std/src/io/buffered/bufreader/buffer.rs b/library/std/src/io/buffered/bufreader/buffer.rs index 2694726b3f44f..9b600cd55758b 100644 --- a/library/std/src/io/buffered/bufreader/buffer.rs +++ b/library/std/src/io/buffered/bufreader/buffer.rs @@ -21,19 +21,25 @@ pub struct Buffer { // Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are // initialized with bytes from a read. filled: usize, + // This is the max number of bytes returned across all `fill_buf` calls. We track this so that we + // can accurately tell `read_buf` how many bytes of buf are initialized, to bypass as much of its + // defensive initialization as possible. Note that while this often the same as `filled`, it + // doesn't need to be. Calls to `fill_buf` are not required to actually fill the buffer, and + // omitting this is a huge perf regression for `Read` impls that do not. + initialized: usize, } impl Buffer { #[inline] pub fn with_capacity(capacity: usize) -> Self { let buf = Box::new_uninit_slice(capacity); - Self { buf, pos: 0, filled: 0 } + Self { buf, pos: 0, filled: 0, initialized: 0 } } #[inline] pub fn try_with_capacity(capacity: usize) -> io::Result { match Box::try_new_uninit_slice(capacity) { - Ok(buf) => Ok(Self { buf, pos: 0, filled: 0 }), + Ok(buf) => Ok(Self { buf, pos: 0, filled: 0, initialized: 0 }), Err(_) => { Err(io::const_error!(ErrorKind::OutOfMemory, "failed to allocate read buffer")) } @@ -62,6 +68,12 @@ impl Buffer { self.pos } + // This is only used by a test which asserts that the initialization-tracking is correct. + #[cfg(test)] + pub fn initialized(&self) -> usize { + self.initialized + } + #[inline] pub fn discard_buffer(&mut self) { self.pos = 0; @@ -98,8 +110,13 @@ impl Buffer { /// Read more bytes into the buffer without discarding any of its contents pub fn read_more(&mut self, mut reader: impl Read) -> io::Result { let mut buf = BorrowedBuf::from(&mut self.buf[self.filled..]); + let old_init = self.initialized - self.filled; + unsafe { + buf.set_init(old_init); + } reader.read_buf(buf.unfilled())?; self.filled += buf.len(); + self.initialized += buf.init_len() - old_init; Ok(buf.len()) } @@ -120,10 +137,16 @@ impl Buffer { debug_assert!(self.pos == self.filled); let mut buf = BorrowedBuf::from(&mut *self.buf); + // SAFETY: `self.filled` bytes will always have been initialized. + unsafe { + buf.set_init(self.initialized); + } + let result = reader.read_buf(buf.unfilled()); self.pos = 0; self.filled = buf.len(); + self.initialized = buf.init_len(); result?; } diff --git a/library/std/src/io/buffered/tests.rs b/library/std/src/io/buffered/tests.rs index fc77b02a8e828..6ad4158b92904 100644 --- a/library/std/src/io/buffered/tests.rs +++ b/library/std/src/io/buffered/tests.rs @@ -1052,6 +1052,30 @@ fn single_formatted_write() { assert_eq!(writer.get_ref().events, [RecordedEvent::Write("hello, world!\n".to_string())]); } +#[test] +fn bufreader_full_initialize() { + struct OneByteReader; + impl Read for OneByteReader { + fn read(&mut self, buf: &mut [u8]) -> crate::io::Result { + if buf.len() > 0 { + buf[0] = 0; + Ok(1) + } else { + Ok(0) + } + } + } + let mut reader = BufReader::new(OneByteReader); + // Nothing is initialized yet. + assert_eq!(reader.initialized(), 0); + + let buf = reader.fill_buf().unwrap(); + // We read one byte... + assert_eq!(buf.len(), 1); + // But we initialized the whole buffer! + assert_eq!(reader.initialized(), reader.capacity()); +} + /// This is a regression test for https://github.com/rust-lang/rust/issues/127584. #[test] fn bufwriter_aliasing() { diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs index 8b5e7c4df4e08..2b558efb8885e 100644 --- a/library/std/src/io/copy.rs +++ b/library/std/src/io/copy.rs @@ -214,19 +214,28 @@ impl BufferedWriterSpec for BufWriter { } let mut len = 0; + let mut init = 0; loop { let buf = self.buffer_mut(); let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into(); + unsafe { + // SAFETY: init is either 0 or the init_len from the previous iteration. + read_buf.set_init(init); + } + if read_buf.capacity() >= DEFAULT_BUF_SIZE { let mut cursor = read_buf.unfilled(); match reader.read_buf(cursor.reborrow()) { Ok(()) => { let bytes_read = cursor.written(); + if bytes_read == 0 { return Ok(len); } + + init = read_buf.init_len() - bytes_read; len += bytes_read as u64; // SAFETY: BorrowedBuf guarantees all of its filled bytes are init @@ -239,6 +248,10 @@ impl BufferedWriterSpec for BufWriter { Err(e) => return Err(e), } } else { + // All the bytes that were already in the buffer are initialized, + // treat them as such when the buffer is flushed. + init += buf.len(); + self.flush_buf()?; } } diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 4c064c435e5bc..b7756befa11e9 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -419,6 +419,8 @@ pub(crate) fn default_read_to_end( .and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE)) .unwrap_or(DEFAULT_BUF_SIZE); + let mut initialized = 0; // Extra initialized bytes from previous loop iteration + const PROBE_SIZE: usize = 32; fn small_probe_read(r: &mut R, buf: &mut Vec) -> Result { @@ -447,6 +449,8 @@ pub(crate) fn default_read_to_end( } } + let mut consecutive_short_reads = 0; + loop { if buf.len() == buf.capacity() && buf.capacity() == start_cap { // The buffer might be an exact fit. Let's read into a probe buffer @@ -470,6 +474,11 @@ pub(crate) fn default_read_to_end( spare = &mut spare[..buf_len]; let mut read_buf: BorrowedBuf<'_> = spare.into(); + // SAFETY: These bytes were initialized but not filled in the previous loop + unsafe { + read_buf.set_init(initialized); + } + let mut cursor = read_buf.unfilled(); let result = loop { match r.read_buf(cursor.reborrow()) { @@ -480,7 +489,9 @@ pub(crate) fn default_read_to_end( } }; + let unfilled_but_initialized = cursor.init_mut().len(); let bytes_read = cursor.written(); + let was_fully_initialized = read_buf.init_len() == buf_len; // SAFETY: BorrowedBuf's invariants mean this much memory is initialized. unsafe { @@ -495,8 +506,27 @@ pub(crate) fn default_read_to_end( return Ok(buf.len() - start_len); } + if bytes_read < buf_len { + consecutive_short_reads += 1; + } else { + consecutive_short_reads = 0; + } + + // store how much was initialized but not filled + initialized = unfilled_but_initialized; + // Use heuristics to determine the max read size if no initial size hint was provided if size_hint.is_none() { + // The reader is returning short reads but it doesn't call ensure_init(). + // In that case we no longer need to restrict read sizes to avoid + // initialization costs. + // When reading from disk we usually don't get any short reads except at EOF. + // So we wait for at least 2 short reads before uncapping the read buffer; + // this helps with the Windows issue. + if !was_fully_initialized && consecutive_short_reads > 1 { + max_read_size = usize::MAX; + } + // we have passed a larger buffer than previously and the // reader still hasn't returned a short read if buf_len >= max_read_size && bytes_read == buf_len { @@ -557,13 +587,8 @@ pub(crate) fn default_read_buf(read: F, mut cursor: BorrowedCursor<'_>) -> Re where F: FnOnce(&mut [u8]) -> Result, { - // SAFETY: We do not uninitialize any part of the buffer. - let n = read(unsafe { cursor.as_mut().write_filled(0) })?; - assert!(n <= cursor.capacity()); - // SAFETY: We've initialized the entire buffer, and `read` can't make it uninitialized. - unsafe { - cursor.advance(n); - } + let n = read(cursor.ensure_init().init_mut())?; + cursor.advance(n); Ok(()) } @@ -3073,21 +3098,31 @@ impl Read for Take { // The condition above guarantees that `self.limit` fits in `usize`. let limit = self.limit as usize; + let extra_init = cmp::min(limit, buf.init_mut().len()); + // SAFETY: no uninit data is written to ibuf let ibuf = unsafe { &mut buf.as_mut()[..limit] }; let mut sliced_buf: BorrowedBuf<'_> = ibuf.into(); + // SAFETY: extra_init bytes of ibuf are known to be initialized + unsafe { + sliced_buf.set_init(extra_init); + } + let mut cursor = sliced_buf.unfilled(); let result = self.inner.read_buf(cursor.reborrow()); + let new_init = cursor.init_mut().len(); let filled = sliced_buf.len(); // cursor / sliced_buf / ibuf must drop here - // SAFETY: filled bytes have been filled and therefore initialized unsafe { - buf.advance(filled); + // SAFETY: filled bytes have been filled and therefore initialized + buf.advance_unchecked(filled); + // SAFETY: new_init bytes of buf's unfilled buffer have been initialized + buf.set_init(new_init); } self.limit -= filled as u64; diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index e14e6432eafaf..b22988d4a8a9d 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -209,6 +209,15 @@ fn read_buf_exact() { assert_eq!(c.read_buf_exact(buf.unfilled()).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); } +#[test] +#[should_panic] +fn borrowed_cursor_advance_overflow() { + let mut buf = [0; 512]; + let mut buf = BorrowedBuf::from(&mut buf[..]); + buf.unfilled().advance(1); + buf.unfilled().advance(usize::MAX); +} + #[test] fn take_eof() { struct R; diff --git a/library/std/src/io/util.rs b/library/std/src/io/util.rs index a09c8bc069306..0410df3ef1a3e 100644 --- a/library/std/src/io/util.rs +++ b/library/std/src/io/util.rs @@ -283,7 +283,7 @@ impl Read for Repeat { // SAFETY: No uninit bytes are being written. unsafe { buf.as_mut() }.write_filled(self.byte); // SAFETY: the entire unfilled portion of buf has been initialized. - unsafe { buf.advance(buf.capacity()) }; + unsafe { buf.advance_unchecked(buf.capacity()) }; Ok(()) } diff --git a/library/std/src/io/util/tests.rs b/library/std/src/io/util/tests.rs index 92dbc3919bea2..d0f106d7af416 100644 --- a/library/std/src/io/util/tests.rs +++ b/library/std/src/io/util/tests.rs @@ -75,36 +75,43 @@ fn empty_reads() { let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit()]; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); Read::by_ref(&mut e).read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [MaybeUninit<_>] = &mut []; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf_exact(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit()]; let mut buf: BorrowedBuf<'_> = buf.into(); assert_eq!(e.read_buf_exact(buf.unfilled()).unwrap_err().kind(), ErrorKind::UnexpectedEof); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); assert_eq!(e.read_buf_exact(buf.unfilled()).unwrap_err().kind(), ErrorKind::UnexpectedEof); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); @@ -113,6 +120,7 @@ fn empty_reads() { ErrorKind::UnexpectedEof, ); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let mut buf = Vec::new(); assert_eq!(e.read_to_end(&mut buf).unwrap(), 0); diff --git a/library/std/src/net/tcp/tests.rs b/library/std/src/net/tcp/tests.rs index 4787f8a1040b9..7c7ef7b2f7018 100644 --- a/library/std/src/net/tcp/tests.rs +++ b/library/std/src/net/tcp/tests.rs @@ -315,6 +315,8 @@ fn read_buf() { let mut buf = BorrowedBuf::from(buf.as_mut_slice()); t!(s.read_buf(buf.unfilled())); assert_eq!(buf.filled(), &[1, 2, 3, 4]); + // TcpStream::read_buf should omit buffer initialization. + assert_eq!(buf.init_len(), 4); t.join().ok().expect("thread panicked"); }) diff --git a/library/std/src/process/tests.rs b/library/std/src/process/tests.rs index c8a83edffe427..12c5130defe5a 100644 --- a/library/std/src/process/tests.rs +++ b/library/std/src/process/tests.rs @@ -188,8 +188,10 @@ fn child_stdout_read_buf() { // ChildStdout::read_buf should omit buffer initialization. if cfg!(target_os = "windows") { assert_eq!(buf.filled(), b"abc\r\n"); + assert_eq!(buf.init_len(), 5); } else { assert_eq!(buf.filled(), b"abc\n"); + assert_eq!(buf.init_len(), 4); }; } diff --git a/library/std/src/sys/fd/hermit.rs b/library/std/src/sys/fd/hermit.rs index 28fafdaf57d8a..2666da16420c4 100644 --- a/library/std/src/sys/fd/hermit.rs +++ b/library/std/src/sys/fd/hermit.rs @@ -33,7 +33,7 @@ impl FileDesc { ) })?; // SAFETY: Exactly `result` bytes have been filled. - unsafe { buf.advance(result as usize) }; + unsafe { buf.advance_unchecked(result as usize) }; Ok(()) } diff --git a/library/std/src/sys/fd/unix.rs b/library/std/src/sys/fd/unix.rs index c5e8646dada1e..bb6c0ac9e18e6 100644 --- a/library/std/src/sys/fd/unix.rs +++ b/library/std/src/sys/fd/unix.rs @@ -185,7 +185,7 @@ impl FileDesc { // SAFETY: `ret` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(ret as usize); + cursor.advance_unchecked(ret as usize); } Ok(()) } @@ -203,7 +203,7 @@ impl FileDesc { // SAFETY: `ret` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(ret as usize); + cursor.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/fs/solid.rs b/library/std/src/sys/fs/solid.rs index ec1db262855ad..f6d5d3b784d3b 100644 --- a/library/std/src/sys/fs/solid.rs +++ b/library/std/src/sys/fs/solid.rs @@ -401,7 +401,7 @@ impl File { // Safety: `num_bytes_read` bytes were written to the unfilled // portion of the buffer - cursor.advance(num_bytes_read); + cursor.advance_unchecked(num_bytes_read); Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/hermit.rs b/library/std/src/sys/net/connection/socket/hermit.rs index 8350d2b5fe4a0..c32f8dcc8de86 100644 --- a/library/std/src/sys/net/connection/socket/hermit.rs +++ b/library/std/src/sys/net/connection/socket/hermit.rs @@ -143,7 +143,7 @@ impl Socket { ) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/solid.rs b/library/std/src/sys/net/connection/socket/solid.rs index ac06cdc00c8f0..673d75046d3f2 100644 --- a/library/std/src/sys/net/connection/socket/solid.rs +++ b/library/std/src/sys/net/connection/socket/solid.rs @@ -190,7 +190,7 @@ impl Socket { netc::recv(self.as_raw_fd(), buf.as_mut().as_mut_ptr().cast(), buf.capacity(), flags) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/unix.rs b/library/std/src/sys/net/connection/socket/unix.rs index 323d6214347e7..6d06a8d86bf16 100644 --- a/library/std/src/sys/net/connection/socket/unix.rs +++ b/library/std/src/sys/net/connection/socket/unix.rs @@ -294,7 +294,7 @@ impl Socket { ) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/windows.rs b/library/std/src/sys/net/connection/socket/windows.rs index 4da51d78ea69b..7355f0ce6bf5e 100644 --- a/library/std/src/sys/net/connection/socket/windows.rs +++ b/library/std/src/sys/net/connection/socket/windows.rs @@ -243,7 +243,7 @@ impl Socket { } } _ => { - unsafe { buf.advance(result as usize) }; + unsafe { buf.advance_unchecked(result as usize) }; Ok(()) } } diff --git a/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs b/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs index f1e4a5a42577a..5041770faf661 100644 --- a/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs @@ -46,7 +46,7 @@ pub fn read_buf(fd: Fd, mut buf: BorrowedCursor<'_>) -> IoResult<()> { let mut userbuf = alloc::User::<[u8]>::uninitialized(buf.capacity()); let len = raw::read(fd, userbuf.as_mut_ptr().cast(), userbuf.len()).from_sgx_result()?; userbuf[..len].copy_to_enclave(&mut buf.as_mut()[..len]); - buf.advance(len); + buf.advance_unchecked(len); Ok(()) } } diff --git a/library/std/src/sys/pal/windows/handle.rs b/library/std/src/sys/pal/windows/handle.rs index ffa8507831acf..90e243e1aa038 100644 --- a/library/std/src/sys/pal/windows/handle.rs +++ b/library/std/src/sys/pal/windows/handle.rs @@ -117,7 +117,7 @@ impl Handle { Ok(read) => { // Safety: `read` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(read); + cursor.advance_unchecked(read); } Ok(()) } @@ -140,7 +140,7 @@ impl Handle { // SAFETY: `read` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(read); + cursor.advance_unchecked(read); } Ok(()) } diff --git a/library/std/src/sys/process/windows/child_pipe.rs b/library/std/src/sys/process/windows/child_pipe.rs index b848435ac275f..da7a86ca072e3 100644 --- a/library/std/src/sys/process/windows/child_pipe.rs +++ b/library/std/src/sys/process/windows/child_pipe.rs @@ -260,7 +260,7 @@ impl ChildPipe { Err(e) => Err(e), Ok(n) => { unsafe { - buf.advance(n); + buf.advance_unchecked(n); } Ok(()) } diff --git a/library/std/src/sys/stdio/zkvm.rs b/library/std/src/sys/stdio/zkvm.rs index 84496ac937363..f31c6c26e87cd 100644 --- a/library/std/src/sys/stdio/zkvm.rs +++ b/library/std/src/sys/stdio/zkvm.rs @@ -19,7 +19,7 @@ impl io::Read for Stdin { fn read_buf(&mut self, mut buf: BorrowedCursor<'_>) -> io::Result<()> { unsafe { let n = abi::sys_read(fileno::STDIN, buf.as_mut().as_mut_ptr().cast(), buf.capacity()); - buf.advance(n); + buf.advance_unchecked(n); } Ok(()) }