diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 742f0b42e2bea7..2ab20b0599c0b4 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -23,7 +23,7 @@ use { StoredAccountsInfo, }, accounts_hash::AccountHash, - buffered_reader::{BufferedReader, Stack}, + buffered_reader::{BufferedReader, ContiguousBufFileRead, Stack}, file_io::read_into_buffer, is_zero_lamport::IsZeroLamport, storable_accounts::StorableAccounts, @@ -1049,19 +1049,17 @@ impl AppendVec { {} } AppendVecFileBacking::File(file) => { - let self_len = self.len(); const BUFFER_SIZE: usize = PAGE_SIZE * 8; - let mut reader = BufferedReader::>::new_stack( - self_len, - file, - STORE_META_OVERHEAD, - ); + let mut reader = BufferedReader::>::new_stack(self.len(), file); + let mut min_buf_len = STORE_META_OVERHEAD; // Buffer for account data that doesn't fit within the stack allocated buffer. // This will be re-used for each account that doesn't fit within the stack allocated buffer. let mut data_overflow_buffer = vec![]; loop { - let offset = reader.get_offset(); - let bytes = match reader.fill_buf() { + let offset = reader.get_file_offset(); + let bytes = match reader + .fill_buf_required_or_overflow(min_buf_len, &mut data_overflow_buffer) + { Ok([]) => break, Ok(bytes) => ValidSlice::new(bytes), Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => break, @@ -1087,53 +1085,26 @@ impl AppendVec { }; callback(account); reader.consume(stored_size); - } else if STORE_META_OVERHEAD + data_len <= BUFFER_SIZE { - reader.set_required_data_len(STORE_META_OVERHEAD + data_len); + // restore default required buffer size + min_buf_len = STORE_META_OVERHEAD; } else { - const MAX_CAPACITY: usize = MAX_PERMITTED_DATA_LENGTH as usize; - // 128KiB covers a reasonably large distribution of typical account sizes. - // In a recent sample, 99.98% of accounts' data lengths were less than or equal to 128KiB. - const MIN_CAPACITY: usize = 1024 * 128; - let capacity = data_overflow_buffer.capacity(); - if data_len > capacity { - let next_cap = data_len - .next_power_of_two() - .clamp(MIN_CAPACITY, MAX_CAPACITY); - data_overflow_buffer.reserve_exact(next_cap - capacity); - // SAFETY: We only write to the uninitialized portion of the buffer via `copy_from_slice` and `read_into_buffer`. - // Later, we ensure we only read from the initialized portion of the buffer. - unsafe { - data_overflow_buffer.set_len(next_cap); + // repeat loop with required buffer size holding whole account data + min_buf_len = STORE_META_OVERHEAD + data_len; + + if min_buf_len > BUFFER_SIZE { + const MAX_CAPACITY: usize = + STORE_META_OVERHEAD + MAX_PERMITTED_DATA_LENGTH as usize; + // 128KiB covers a reasonably large distribution of typical account sizes. + // In a recent sample, 99.98% of accounts' data lengths were less than or equal to 128KiB. + const MIN_CAPACITY: usize = 1024 * 128; + if min_buf_len > data_overflow_buffer.capacity() { + let next_cap = min_buf_len + .next_power_of_two() + .clamp(MIN_CAPACITY, MAX_CAPACITY); + data_overflow_buffer + .reserve_exact(next_cap - data_overflow_buffer.len()); } } - - // Copy already read data to overflow buffer. - data_overflow_buffer[..leftover].copy_from_slice(&bytes.0[next..]); - - // Read remaining data into overflow buffer. - let Ok(bytes_read) = read_into_buffer( - file, - self_len, - offset + next + leftover, - &mut data_overflow_buffer[leftover..data_len], - ) else { - break; - }; - if bytes_read + leftover < data_len { - break; - } - let data = &data_overflow_buffer[..data_len]; - let stored_size = aligned_stored_size(data_len); - let account = StoredAccountMeta { - meta, - account_meta, - data, - offset, - stored_size, - hash, - }; - callback(account); - reader.consume(stored_size); } } } @@ -1252,14 +1223,12 @@ impl AppendVec { AppendVecFileBacking::File(file) => { // Heuristic observed in benchmarking that maintains a reasonable balance between syscalls and data waste const BUFFER_SIZE: usize = PAGE_SIZE * 4; - let mut reader = BufferedReader::>::new_stack( - self_len, - file, - mem::size_of::() + mem::size_of::(), - ); + let mut reader = BufferedReader::>::new_stack(self_len, file); + const REQUIRED_READ_LEN: usize = + mem::size_of::() + mem::size_of::(); loop { - let offset = reader.get_offset(); - let bytes = match reader.fill_buf() { + let offset = reader.get_file_offset(); + let bytes = match reader.fill_buf_required(REQUIRED_READ_LEN) { Ok([]) => break, Ok(bytes) => ValidSlice::new(bytes), Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => break, @@ -1703,12 +1672,17 @@ pub mod tests { let mut test_accounts = Vec::with_capacity(num_accounts); let mut file_size = 0; + let special_file_interval = num_accounts / 8; for i in 0..num_accounts { let data_len = match i { - // ensure one max size account - 0 => MAX_PERMITTED_DATA_LENGTH as usize, - // ensure one 64KiB account - x if x == num_accounts - 1 => 1 << 16, + // Create several spread out accounts with varying sizes: + // for (x / special_file_interval) in 0..7 range + x if x % special_file_interval == 0 => { + // mult increases in 0 to 3 range twice + let mult = (x / special_file_interval) % 4; + // and data_len goes over 0..MAX_PERMITTED_DATA_LENGTH range also twice + mult * (MAX_PERMITTED_DATA_LENGTH as usize) / 3 + } // Otherwise use a reasonably small account to avoid long test times x => x % 256, }; diff --git a/accounts-db/src/buffered_reader.rs b/accounts-db/src/buffered_reader.rs index d1a649a762d067..e038f700b782a5 100644 --- a/accounts-db/src/buffered_reader.rs +++ b/accounts-db/src/buffered_reader.rs @@ -9,7 +9,7 @@ //! `set_required_data_len(len)`, the whole account data is buffered _linearly_ in memory and available to //! be returned. use { - crate::file_io::read_more_buffer, + crate::file_io::{read_into_buffer, read_more_buffer}, std::{ fs::File, io::{self, BufRead, BufReader}, @@ -26,6 +26,7 @@ use { /// caller may be able to opt for a stack-allocated buffer rather than a heap-allocated buffer, or /// vice versa. pub(crate) trait Backing { + fn capacity(&self) -> usize; unsafe fn as_slice(&self) -> &[u8]; unsafe fn as_mut_slice(&mut self) -> &mut [u8]; } @@ -46,6 +47,10 @@ impl Stack { } impl Backing for Stack { + fn capacity(&self) -> usize { + N + } + #[inline(always)] unsafe fn as_slice(&self) -> &[u8] { slice::from_raw_parts(self.0.as_ptr() as *const u8, N) @@ -57,6 +62,48 @@ impl Backing for Stack { } } +/// An extension of the `BufRead` trait for file readers that require stronger control +/// over returned buffer size and tracking of the file offset. +/// +/// Unlike the standard `fill_buf`, which only guarantees a non-empty buffer, +/// this trait allows callers to: +/// - Enforce a minimum number of contiguous bytes to be made available. +/// - Fall back to an overflow buffer if the internal buffer cannot satisfy the request. +/// - Retrieve the current file offset corresponding to the start of the next buffer. +pub(crate) trait ContiguousBufFileRead<'a>: BufRead { + /// Returns the current file offset corresponding to the start of the buffer + /// that will be returned by the next call to `fill_buf_*`. + /// + /// This offset represents the position within the underlying file where data + /// will be consumed from. + fn get_file_offset(&self) -> usize; + + /// Ensures the internal buffer contains at least `required_len` contiguous bytes, + /// and returns a slice to that buffer. + /// + /// Returns `Err(io::ErrorKind::UnexpectedEof)` if the end of file is reached + /// before the required number of bytes is available. + fn fill_buf_required(&mut self, required_len: usize) -> io::Result<&[u8]>; + + /// Attempts to provide at least `required_len` contiguous bytes by using + /// the internal buffer or the provided `overflow_buffer` if needed. + /// + /// If the internal buffer alone does not satisfy the requirement, additional + /// bytes are read and appended to `overflow_buffer`, which is resized to fit the data. + /// + /// Returns a slice containing all the required data (may point to either buffer). + /// + /// Returns `Err(io::ErrorKind::UnexpectedEof)` if the end of file is reached + /// before the required number of bytes can be read. + fn fill_buf_required_or_overflow<'b>( + &'b mut self, + required_len: usize, + overflow_buffer: &'b mut Vec, + ) -> io::Result<&'b [u8]> + where + 'a: 'b; +} + /// read a file a large buffer at a time and provide access to a slice in that buffer pub struct BufferedReader<'a, T> { /// when we are next asked to read from file, start at this offset @@ -67,105 +114,126 @@ pub struct BufferedReader<'a, T> { buf_valid_bytes: Range, /// offset in the file of the `buf_valid_bytes`.`start` file_last_offset: usize, - /// how many contiguous bytes caller needs - read_requirements: Option, /// how many bytes are valid in the file. The file's len may be longer. file_len_valid: usize, /// reference to file handle file: &'a File, - /// we always want at least this many contiguous bytes available or we must read more into the buffer. - default_min_read_requirement: usize, } impl<'a, T> BufferedReader<'a, T> { /// `buffer_size`: how much to try to read at a time /// `file_len_valid`: # bytes that are valid in the file, may be less than overall file len /// `default_min_read_requirement`: make sure we always have this much data available if we're asked to read - pub fn new( - backing: T, - file_len_valid: usize, - file: &'a File, - default_min_read_requirement: usize, - ) -> Self { + pub fn new(backing: T, file_len_valid: usize, file: &'a File) -> Self { Self { file_offset_of_next_read: 0, buf: backing, buf_valid_bytes: 0..0, file_last_offset: 0, - read_requirements: None, file_len_valid, file, - default_min_read_requirement, } } +} - /// specify the amount of data required to read next time `read` is called +impl<'a, T: Backing> ContiguousBufFileRead<'a> for BufferedReader<'a, T> { #[inline(always)] - pub fn set_required_data_len(&mut self, len: usize) { - self.read_requirements = Some(len); + fn get_file_offset(&self) -> usize { + if self.buf_valid_bytes.is_empty() { + self.file_offset_of_next_read + } else { + self.file_last_offset + self.buf_valid_bytes.start + } } -} -impl<'a, T> BufferedReader<'a, T> -where - T: Backing, -{ - /// read to make sure we have the minimum amount of data - fn read_required_bytes(&mut self) -> io::Result<()> { - let must_read = self - .read_requirements - .unwrap_or(self.default_min_read_requirement); - if self.buf_valid_bytes.len() < must_read { - // we haven't used all the bytes we read last time, so adjust the effective offset - debug_assert!(self.buf_valid_bytes.len() <= self.file_offset_of_next_read); - self.file_last_offset = self.file_offset_of_next_read - self.buf_valid_bytes.len(); - read_more_buffer( - self.file, - self.file_len_valid, - &mut self.file_offset_of_next_read, - // SAFETY: `read_more_buffer` will only _write_ to uninitialized memory and lifetime is tied to self. - unsafe { self.buf.as_mut_slice() }, - &mut self.buf_valid_bytes, - )?; - if self.buf_valid_bytes.len() < must_read { + fn fill_buf_required(&mut self, required_len: usize) -> io::Result<&[u8]> { + if self.buf_valid_bytes.len() < required_len { + self.read_more_bytes()?; + if self.buf_valid_bytes.len() < required_len { return Err(io::Error::new( io::ErrorKind::UnexpectedEof, "unable to read enough data", )); } } - // reset this once we have checked that we had this much data once - self.read_requirements = None; - Ok(()) + Ok(self.valid_slice()) } - /// Return file offset within `file` of the current consume position. - /// - /// The offset is corresponding to the start of buffer that will be returned - /// by the next `fill_buf` call. - #[inline(always)] - pub fn get_offset(&'a self) -> usize { - if self.buf_valid_bytes.is_empty() { - self.file_offset_of_next_read - } else { - self.file_last_offset + self.buf_valid_bytes.start + fn fill_buf_required_or_overflow<'b>( + &'b mut self, + required_len: usize, + overflow_buffer: &'b mut Vec, + ) -> io::Result<&'b [u8]> + where + 'a: 'b, + { + if required_len <= self.buf.capacity() { + return self.fill_buf_required(required_len); + } + + if required_len > overflow_buffer.capacity() { + overflow_buffer.reserve_exact(required_len - overflow_buffer.len()); + } + // SAFETY: We only write to the uninitialized portion of the buffer via `copy_from_slice` and `read_into_buffer`. + // Later, we ensure we only read from the initialized portion of the buffer. + unsafe { + overflow_buffer.set_len(required_len); + } + + // Copy already read data to overflow buffer. + let available_valid_data = self.valid_slice(); + let leftover = available_valid_data.len(); + overflow_buffer[..leftover].copy_from_slice(available_valid_data); + + // Read remaining data into overflow buffer. + let read_dst = &mut overflow_buffer[leftover..]; + let bytes_read = read_into_buffer( + self.file, + self.file_len_valid, + self.file_offset_of_next_read, + read_dst, + )?; + if bytes_read < read_dst.len() { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "unable to read required amount of data", + )); } + Ok(overflow_buffer.as_slice()) + } +} + +impl BufferedReader<'_, T> +where + T: Backing, +{ + /// Defragment buffer and read more bytes to make sure we have filled available + /// space as much as possible. + fn read_more_bytes(&mut self) -> io::Result<()> { + // we haven't used all the bytes we read last time, so adjust the effective offset + debug_assert!(self.buf_valid_bytes.len() <= self.file_offset_of_next_read); + self.file_last_offset = self.file_offset_of_next_read - self.buf_valid_bytes.len(); + read_more_buffer( + self.file, + self.file_len_valid, + &mut self.file_offset_of_next_read, + // SAFETY: `read_more_buffer` will only _write_ to uninitialized memory and lifetime is tied to self. + unsafe { self.buf.as_mut_slice() }, + &mut self.buf_valid_bytes, + ) + } + + fn valid_slice(&self) -> &[u8] { + // SAFETY: We only read from memory that has been initialized by `read_more_buffer` + // and lifetime is tied to self. + unsafe { &self.buf.as_slice()[self.buf_valid_bytes.clone()] } } } impl<'a, const N: usize> BufferedReader<'a, Stack> { /// create a new buffered reader with a stack-allocated buffer - pub fn new_stack( - file_len_valid: usize, - file: &'a File, - default_min_read_requirement: usize, - ) -> Self { - BufferedReader::new( - Stack::new(), - file_len_valid, - file, - default_min_read_requirement, - ) + pub fn new_stack(file_len_valid: usize, file: &'a File) -> Self { + BufferedReader::new(Stack::new(), file_len_valid, file) } } @@ -185,18 +253,11 @@ impl io::Read for BufferedReader<'_, T> { /// `BufferedReader` implements a more permissive API compared to `BufRead` /// by allowing `consume` to advance beyond the end of the buffer returned by `fill_buf`. impl BufRead for BufferedReader<'_, T> { - /// Return the biggest slice of valid data starting at the current offset. - /// - /// Note that `fill_buf` has stronger guarantee than `BufRead::fill_buf` and returns - /// at least the number of bytes requested by `default_min_read_requirement` and - /// `set_required_data_len`. If that condition cannot be met - /// `Err(io::ErrorKind::UnexpectedEof)` is returned. fn fill_buf(&mut self) -> io::Result<&[u8]> { - self.read_required_bytes()?; - - // SAFETY: We only read from memory that has been initialized by `read_more_buffer` - // and lifetime is tied to self. - Ok(unsafe { &self.buf.as_slice()[self.buf_valid_bytes.clone()] }) + if self.buf_valid_bytes.is_empty() { + self.read_more_bytes()?; + } + Ok(self.valid_slice()) } /// Advance the offset by `amt` to a `file` position where next `fill_buf` buffer should @@ -225,7 +286,7 @@ pub fn large_file_buf_reader( if agave_io_uring::io_uring_supported() { use crate::io_uring::sequential_file_reader::SequentialFileReader; - let io_uring_reader = SequentialFileReader::with_capacity(buf_size, path.as_ref()); + let io_uring_reader = SequentialFileReader::with_capacity(buf_size, &path); match io_uring_reader { Ok(reader) => return Ok(Box::new(reader)), Err(error) => { @@ -262,10 +323,9 @@ mod tests { // First read 16 bytes to fill buffer let file_len_valid = 32; let default_min_read = 8; - let mut reader = - BufferedReader::new(backing, file_len_valid, &sample_file, default_min_read); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let mut reader = BufferedReader::new(backing, file_len_valid, &sample_file); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(default_min_read).unwrap()); let mut expected_offset = 0; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), buffer_size); @@ -275,31 +335,34 @@ mod tests { let advance = 16; let mut required_len = 32; reader.consume(advance); - reader.set_required_data_len(required_len); - let offset = reader.get_offset(); + let offset = reader.get_file_offset(); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!( - reader.fill_buf().expect_err("should hit EOF").kind(), + reader + .fill_buf_required(required_len) + .expect_err("should hit EOF") + .kind(), io::ErrorKind::UnexpectedEof ); // Continue reading should yield EOF. reader.consume(advance); - reader.set_required_data_len(required_len); - let offset = reader.get_offset(); + let offset = reader.get_file_offset(); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!( - reader.fill_buf().expect_err("should hit EOF").kind(), + reader + .fill_buf_required(required_len) + .expect_err("should hit EOF") + .kind(), io::ErrorKind::UnexpectedEof ); // set_required_data to zero and offset should not change, and slice should be empty. required_len = 0; - reader.set_required_data_len(required_len); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(required_len).unwrap()); let expected_offset = file_len_valid; assert_eq!(offset, expected_offset); let expected_slice_len = 0; @@ -319,10 +382,9 @@ mod tests { // First read 16 bytes to fill buffer let default_min_read_size = 8; - let mut reader = - BufferedReader::new(backing, valid_len, &sample_file, default_min_read_size); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let mut reader = BufferedReader::new(backing, valid_len, &sample_file); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(default_min_read_size).unwrap()); let mut expected_offset = 0; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), buffer_size); @@ -332,12 +394,14 @@ mod tests { let mut advance = 16; let mut required_data_len = 32; reader.consume(advance); - reader.set_required_data_len(required_data_len); - let offset = reader.get_offset(); + let offset = reader.get_file_offset(); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!( - reader.fill_buf().expect_err("should hit EOF").kind(), + reader + .fill_buf_required(required_data_len) + .expect_err("should hit EOF") + .kind(), io::ErrorKind::UnexpectedEof ); @@ -345,12 +409,14 @@ mod tests { advance = 14; required_data_len = 32; reader.consume(advance); - reader.set_required_data_len(required_data_len); - let offset = reader.get_offset(); + let offset = reader.get_file_offset(); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!( - reader.fill_buf().expect_err("should hit EOF").kind(), + reader + .fill_buf_required(required_data_len) + .expect_err("should hit EOF") + .kind(), io::ErrorKind::UnexpectedEof ); @@ -358,12 +424,14 @@ mod tests { advance = 1; required_data_len = 8; reader.consume(advance); - reader.set_required_data_len(required_data_len); - let offset = reader.get_offset(); + let offset = reader.get_file_offset(); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!( - reader.fill_buf().expect_err("should hit EOF").kind(), + reader + .fill_buf_required(required_data_len) + .expect_err("should hit EOF") + .kind(), io::ErrorKind::UnexpectedEof ); @@ -371,12 +439,14 @@ mod tests { advance = 3; required_data_len = 8; reader.consume(advance); - reader.set_required_data_len(required_data_len); - let offset = reader.get_offset(); + let offset = reader.get_file_offset(); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!( - reader.fill_buf().expect_err("Should hit EOF").kind(), + reader + .fill_buf_required(required_data_len) + .expect_err("Should hit EOF") + .kind(), io::ErrorKind::UnexpectedEof ); } @@ -392,10 +462,9 @@ mod tests { // First read 16 bytes to fill buffer let file_len_valid = 32; let default_min_read_size = 8; - let mut reader = - BufferedReader::new(backing, file_len_valid, &sample_file, default_min_read_size); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let mut reader = BufferedReader::new(backing, file_len_valid, &sample_file); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(default_min_read_size).unwrap()); let mut expected_offset = 0; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), buffer_size); @@ -405,9 +474,8 @@ mod tests { let mut advance = 8; let mut required_len = 8; reader.consume(advance); - reader.set_required_data_len(required_len); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(required_len).unwrap()); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), required_len); @@ -420,9 +488,8 @@ mod tests { advance = 8; required_len = 16; reader.consume(advance); - reader.set_required_data_len(required_len); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(required_len).unwrap()); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), required_len); @@ -435,12 +502,14 @@ mod tests { advance = 16; required_len = 32; reader.consume(advance); - reader.set_required_data_len(required_len); - let offset = reader.get_offset(); + let offset = reader.get_file_offset(); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!( - reader.fill_buf().expect_err("should hit EOF").kind(), + reader + .fill_buf_required(required_len) + .expect_err("should hit EOF") + .kind(), io::ErrorKind::UnexpectedEof ); } @@ -456,9 +525,9 @@ mod tests { // First read 16 bytes to fill buffer let valid_len = 32; let default_min_read = 8; - let mut reader = BufferedReader::new(backing, valid_len, &sample_file, default_min_read); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let mut reader = BufferedReader::new(backing, valid_len, &sample_file); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(default_min_read).unwrap()); let mut expected_offset = 0; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), buffer_size); @@ -469,9 +538,8 @@ mod tests { let mut advance = 8; let mut required_data_len = 16; reader.consume(advance); - reader.set_required_data_len(required_data_len); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(required_data_len).unwrap()); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), required_data_len); @@ -484,9 +552,8 @@ mod tests { advance = 16; required_data_len = 8; reader.consume(advance); - reader.set_required_data_len(required_data_len); - let offset = reader.get_offset(); - let slice = ValidSlice::new(reader.fill_buf().unwrap()); + let offset = reader.get_file_offset(); + let slice = ValidSlice::new(reader.fill_buf_required(required_data_len).unwrap()); expected_offset += advance; assert_eq!(offset, expected_offset); assert_eq!(slice.len(), required_data_len); @@ -495,4 +562,60 @@ mod tests { &bytes[expected_offset..expected_offset + required_data_len] ); } + + #[test_case(Stack::<16>::new(), 16)] + fn test_fill_buf_required_or_overflow(backing: impl Backing, buffer_size: usize) { + // Setup a sample file with 32 bytes of data + const FILE_SIZE: usize = 32; + let mut sample_file = tempfile().unwrap(); + let bytes = rand_bytes::(); + sample_file.write_all(&bytes).unwrap(); + + let file_len_valid = 32; + let mut reader = BufferedReader::new(backing, file_len_valid, &sample_file); + + // Case 1: required_len <= buffer_size (no overflow needed) + let mut overflow = Vec::new(); + let required_len = 8; + let slice = reader + .fill_buf_required_or_overflow(required_len, &mut overflow) + .unwrap(); + assert_eq!(&slice[..required_len], &bytes[..required_len]); + assert!(overflow.is_empty()); + + // Consume part of the buffer to simulate partial reading + reader.consume(required_len); + + // Case 2: required_len > buffer_size (overflow required) + let mut overflow = Vec::new(); + let required_len = buffer_size + 8; + let slice = reader + .fill_buf_required_or_overflow(required_len, &mut overflow) + .unwrap(); + + // Internal buffer is size `buffer_size`, overflow should extend with the remaining `8` bytes + assert_eq!(slice.len(), required_len); + assert_eq!(slice, &bytes[8..8 + required_len]); + assert_eq!(overflow.len(), required_len); + + // Consume everything to reach EOF + reader.consume(required_len); + + // Case 3: required_len larger than remaining data (expect UnexpectedEof) + let mut overflow = Vec::new(); + let required_len = 64; + let result = reader.fill_buf_required_or_overflow(required_len, &mut overflow); + assert_eq!(result.unwrap_err().kind(), io::ErrorKind::UnexpectedEof); + + // Case 4: required_len = 0 (should return empty slice) + let mut overflow = Vec::new(); + let required_len = 0; + let offset_before = reader.get_file_offset(); + let slice = reader + .fill_buf_required_or_overflow(required_len, &mut overflow) + .unwrap(); + assert_eq!(slice.len(), 0); + let offset_after = reader.get_file_offset(); + assert_eq!(offset_before, offset_after); + } }