-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
cat: Performance improvement when printing line numbers #7645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,64 @@ mod splice; | |
| const USAGE: &str = help_usage!("cat.md"); | ||
| const ABOUT: &str = help_about!("cat.md"); | ||
|
|
||
| struct LineNumber { | ||
| buf: Vec<u8>, | ||
| } | ||
|
|
||
| // Logic to store a string for the line number. Manually incrementing the value | ||
| // represented in a buffer like this is significantly faster than storing | ||
| // a `usize` and using the standard Rust formatting macros to format a `usize` | ||
| // to a string each time it's needed. | ||
| // String is initialized to " 1\t" and incremented each time `increment` is | ||
| // called. When the value overflows the range storable in the buffer, a b'1' is | ||
| // prepended and the counting continues. | ||
| impl LineNumber { | ||
| fn new() -> Self { | ||
| LineNumber { | ||
| // Initialize buf to b" 1\t" | ||
| buf: Vec::from(b" 1\t"), | ||
| } | ||
| } | ||
|
|
||
| fn increment(&mut self) { | ||
| // skip(1) to avoid the \t in the last byte. | ||
| for ascii_digit in self.buf.iter_mut().rev().skip(1) { | ||
| // Working from the least-significant digit, increment the number in the buffer. | ||
| // If we hit anything other than a b'9' we can break since the next digit is | ||
| // unaffected. | ||
| // Also note that if we hit a b' ', we can think of that as a 0 and increment to b'1'. | ||
| // If/else here is faster than match (as measured with some benchmarking Apr-2025), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting, the assembly is indeed a bit different in main here: |
||
| // probably since we can prioritize most likely digits first. | ||
| if (b'0'..=b'8').contains(ascii_digit) { | ||
| *ascii_digit += 1; | ||
| break; | ||
| } else if b'9' == *ascii_digit { | ||
| *ascii_digit = b'0'; | ||
| } else { | ||
| assert_eq!(*ascii_digit, b' '); | ||
| *ascii_digit = b'1'; | ||
| break; | ||
| } | ||
| } | ||
| if self.buf[0] == b'0' { | ||
| // This implies we've overflowed. In this case the buffer will be | ||
| // [b'0', b'0', ..., b'0', b'\t']. | ||
| // For debugging, the following logic would assert that to be the case. | ||
| // assert_eq!(*self.buf.last().unwrap(), b'\t'); | ||
| // for ascii_digit in self.buf.iter_mut().rev().skip(1) { | ||
| // assert_eq!(*ascii_digit, b'0'); | ||
| // } | ||
|
|
||
| // All we need to do is prepend a b'1' and we're good. | ||
| self.buf.insert(0, b'1'); | ||
| } | ||
| } | ||
|
|
||
| fn write(&self, writer: &mut impl Write) -> std::io::Result<()> { | ||
| writer.write_all(&self.buf) | ||
| } | ||
| } | ||
|
|
||
| #[derive(Error, Debug)] | ||
| enum CatError { | ||
| /// Wrapper around `io::Error` | ||
|
|
@@ -106,7 +164,7 @@ impl OutputOptions { | |
| /// when we can't write fast. | ||
| struct OutputState { | ||
| /// The current line number | ||
| line_number: usize, | ||
| line_number: LineNumber, | ||
|
|
||
| /// Whether the output cursor is at the beginning of a new line | ||
| at_line_start: bool, | ||
|
|
@@ -390,7 +448,7 @@ fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> { | |
| let out_info = FileInformation::from_file(&std::io::stdout()).ok(); | ||
|
|
||
| let mut state = OutputState { | ||
| line_number: 1, | ||
| line_number: LineNumber::new(), | ||
| at_line_start: true, | ||
| skipped_carriage_return: false, | ||
| one_blank_kept: false, | ||
|
|
@@ -529,8 +587,8 @@ fn write_lines<R: FdReadable>( | |
| } | ||
| state.one_blank_kept = false; | ||
| if state.at_line_start && options.number != NumberingMode::None { | ||
| write!(writer, "{0:6}\t", state.line_number)?; | ||
| state.line_number += 1; | ||
| state.line_number.write(&mut writer)?; | ||
| state.line_number.increment(); | ||
| } | ||
|
|
||
| // print to end of line or end of buffer | ||
|
|
@@ -589,8 +647,8 @@ fn write_new_line<W: Write>( | |
| if !state.at_line_start || !options.squeeze_blank || !state.one_blank_kept { | ||
| state.one_blank_kept = true; | ||
| if state.at_line_start && options.number == NumberingMode::All { | ||
| write!(writer, "{0:6}\t", state.line_number)?; | ||
| state.line_number += 1; | ||
| state.line_number.write(writer)?; | ||
| state.line_number.increment(); | ||
| } | ||
| write_end_of_line(writer, options.end_of_line().as_bytes(), is_interactive)?; | ||
| } | ||
|
|
@@ -743,4 +801,25 @@ mod tests { | |
| assert_eq!(writer.buffer(), [b'^', byte + 64]); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_incrementing_string() { | ||
| let mut incrementing_string = super::LineNumber::new(); | ||
| assert_eq!(b" 1\t", incrementing_string.buf.as_slice()); | ||
| incrementing_string.increment(); | ||
| assert_eq!(b" 2\t", incrementing_string.buf.as_slice()); | ||
| // Run through to 100 | ||
| for _ in 3..=100 { | ||
| incrementing_string.increment(); | ||
| } | ||
| assert_eq!(b" 100\t", incrementing_string.buf.as_slice()); | ||
| // Run through until we overflow the original size. | ||
| for _ in 101..=1000000 { | ||
| incrementing_string.increment(); | ||
| } | ||
| // Confirm that the buffer expands when we overflow the original size. | ||
| assert_eq!(b"1000000\t", incrementing_string.buf.as_slice()); | ||
| incrementing_string.increment(); | ||
| assert_eq!(b"1000001\t", incrementing_string.buf.as_slice()); | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks very similar to the
fast_incfunction I have here: https://github.com/uutils/coreutils/pull/7564/files#diff-8ef9575cb5c2b35e15751308bf63d6dbf3ac217a383790f9355bc5e13ab2fdb9R272I wonder if we should try to reuse that? (my function is a bit more generic as increment can be other values that aren't 1, so I'm not 100% sure how it'll do in terms of performance)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm inclined to leave my code as-is for now. I've done some benchmarking and even a small change in the logic I've written has a measurable difference on the overall performance, so I think moving to a generic function would slow things down quite a bit.
Do you think you'll ever implement a specialization for the common-case of adding exactly 1?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No problem, happy to play with this after both our PR are merged ,-)
I'm hoping that by forcing
#[inline]the compiler will give us the specialized version for free.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I played with it anyway ,-P
Dirty commit here: 606655e gets within 1-2% of your implementation.
And 03a0481 gets 1% faster with an optimized version, but it's a bit unfortunate that we need to copy code (I can't see how we can avoid that). edit: bc689e5 ever more optimized...
I'll need to check what happens when I move the function to
uucore(not sure yet to understand how rust does things w.r.t. compile/link...)One difference is that I preallocate a larger buffer, so that the vector never needs to be grown (we'll never reach more than 1024 digits ,-P)
(again, I don't mean to block this PR, happy to look further after merge)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hey, for sure if there's a version in
uucorethat gives the same performance then let's use that 👍