Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 64 additions & 22 deletions src/uu/head/src/head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,16 +350,10 @@ fn read_but_last_n_lines(
/// Return the index in `input` just after the `n`th line from the end.
///
/// If `n` exceeds the number of lines in this file, then return 0.
///
/// The cursor must be at the start of the seekable input before
/// calling this function. This function rewinds the cursor to the
/// This function rewinds the cursor to the
/// beginning of the input just before returning unless there is an
/// I/O error.
///
/// If `zeroed` is `false`, interpret the newline character `b'\n'` as
/// a line ending. If `zeroed` is `true`, interpret the null character
/// `b'\0'` as a line ending instead.
///
/// # Errors
///
/// This function returns an error if there is a problem seeking
Expand Down Expand Up @@ -390,20 +384,21 @@ fn find_nth_line_from_end<R>(input: &mut R, n: u64, separator: u8) -> std::io::R
where
R: Read + Seek,
{
let size = input.seek(SeekFrom::End(0))?;
let file_size = input.seek(SeekFrom::End(0))?;

let mut buffer = [0u8; BUF_SIZE];
let buf_size: usize = (BUF_SIZE as u64).min(size).try_into().unwrap();
let buffer = &mut buffer[..buf_size];

let mut i = 0u64;
let mut lines = 0u64;

loop {
// the casts here are ok, `buffer.len()` should never be above a few k
input.seek(SeekFrom::Current(
-((buffer.len() as i64).min((size - i) as i64)),
))?;
let bytes_remaining_to_search = file_size - i;
let bytes_to_read_this_loop = bytes_remaining_to_search.min(BUF_SIZE.try_into().unwrap());
let read_start_offset = bytes_remaining_to_search - bytes_to_read_this_loop;
let buffer = &mut buffer[..bytes_to_read_this_loop.try_into().unwrap()];

input.seek(SeekFrom::Start(read_start_offset))?;
input.read_exact(buffer)?;
for byte in buffer.iter().rev() {
if byte == &separator {
Expand All @@ -412,11 +407,11 @@ where
// if it were just `n`,
if lines == n + 1 {
input.rewind()?;
return Ok(size - i);
return Ok(file_size - i);
}
i += 1;
}
if size - i == 0 {
if file_size - i == 0 {
input.rewind()?;
return Ok(0);
}
Expand Down Expand Up @@ -700,12 +695,59 @@ mod tests {

#[test]
fn test_find_nth_line_from_end() {
let mut input = Cursor::new("x\ny\nz\n");
assert_eq!(find_nth_line_from_end(&mut input, 0, b'\n').unwrap(), 6);
assert_eq!(find_nth_line_from_end(&mut input, 1, b'\n').unwrap(), 4);
assert_eq!(find_nth_line_from_end(&mut input, 2, b'\n').unwrap(), 2);
assert_eq!(find_nth_line_from_end(&mut input, 3, b'\n').unwrap(), 0);
assert_eq!(find_nth_line_from_end(&mut input, 4, b'\n').unwrap(), 0);
assert_eq!(find_nth_line_from_end(&mut input, 1000, b'\n').unwrap(), 0);
// Make sure our input buffer is several multiples of BUF_SIZE in size
// such that we can be reasonably confident we've exercised all logic paths.
// Make the contents of the buffer look like...
// aaaa\n
// aaaa\n
// aaaa\n
// aaaa\n
// aaaa\n
// ...
// This will make it easier to validate the results since each line will have
// 5 bytes in it.

let minimum_buffer_size = BUF_SIZE * 4;
let mut input_buffer = vec![];
let mut loop_iteration: u64 = 0;
while input_buffer.len() < minimum_buffer_size {
for _n in 0..4 {
input_buffer.push(b'a');
}
loop_iteration += 1;
input_buffer.push(b'\n');
}

let lines_in_input_file = loop_iteration;
let input_length = lines_in_input_file * 5;
assert_eq!(input_length, input_buffer.len().try_into().unwrap());
let mut input = Cursor::new(input_buffer);
// We now have loop_iteration lines in the buffer Now walk backwards through the buffer
// to confirm everything parses correctly.
// Use a large step size to prevent the test from taking too long, but don't use a power
// of 2 in case we miss some corner case.
let step_size = 511;
for n in (0..lines_in_input_file).filter(|v| v % step_size == 0) {
// The 5*n comes from 5-bytes per row.
assert_eq!(
find_nth_line_from_end(&mut input, n, b'\n').unwrap(),
input_length - 5 * n
);
}

// Now confirm that if we query with a value >= lines_in_input_file we get an offset
// of 0
assert_eq!(
find_nth_line_from_end(&mut input, lines_in_input_file, b'\n').unwrap(),
0
);
assert_eq!(
find_nth_line_from_end(&mut input, lines_in_input_file + 1, b'\n').unwrap(),
0
);
assert_eq!(
find_nth_line_from_end(&mut input, lines_in_input_file + 1000, b'\n').unwrap(),
0
);
}
}
40 changes: 40 additions & 0 deletions tests/by-util/test_head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,46 @@ fn test_presume_input_pipe_5_chars() {
.stdout_is_fixture("lorem_ipsum_5_chars.expected");
}

#[test]
fn test_read_backwards_lines_large_file() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, I would prefer to create these files on the fly, no need to add big files in the tree :)
and I guess you can reproduce it with only 1 => 50 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the test code to generate the files on the fly. Files need to be pretty big since the bug is only seen for backward searches >128K (i.e. double the BUF_SIZE value in head.rs)

// Create our fixtures on the fly. We need the input file to be at least double
// the size of BUF_SIZE as specified in head.rs. Go for something a bit bigger
// than that.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;
let seq_30000_file_name = "seq_30000";
let seq_1000_file_name = "seq_1000";
scene
.cmd("seq")
.arg("30000")
.set_stdout(fixtures.make_file(seq_30000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("1000")
.set_stdout(fixtures.make_file(seq_1000_file_name))
.succeeds();

// Now run our tests.
scene
.ucmd()
.args(&["-n", "-29000", "seq_30000"])
.succeeds()
.stdout_is_fixture("seq_1000");

scene
.ucmd()
.args(&["-n", "-30000", "seq_30000"])
.run()
.stdout_is_fixture("emptyfile.txt");

scene
.ucmd()
.args(&["-n", "-30001", "seq_30000"])
.run()
.stdout_is_fixture("emptyfile.txt");
}

#[cfg(all(
not(target_os = "windows"),
not(target_os = "macos"),
Expand Down
Loading