From b40f15b72e7c15513ab0e61b4f751e2b9760f806 Mon Sep 17 00:00:00 2001 From: FidelSch Date: Fri, 27 Feb 2026 12:43:54 -0300 Subject: [PATCH 1/4] fix(numfmt): Read lines only up to null byte (as GNU does) --- src/uu/numfmt/src/numfmt.rs | 11 +++++++++-- tests/by-util/test_numfmt.rs | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index d0bedfa49fb..99c56de517a 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -72,11 +72,18 @@ fn write_line( input_line: &[u8], options: &NumfmtOptions, ) -> UResult<()> { + // Read lines only up to null byte (as GNU does) + let line = input_line + .iter() + .take_while(|&&b| b != b'\0') + .cloned() + .collect::>(); + let handled_line = if options.delimiter.is_some() { - write_formatted_with_delimiter(writer, input_line, options) + write_formatted_with_delimiter(writer, &line, options) } else { // Whitespace mode requires valid UTF-8 - match std::str::from_utf8(input_line) { + match std::str::from_utf8(&line) { Ok(s) => write_formatted_with_whitespace(writer, s, options), Err(_) => Err(translate!("numfmt-error-invalid-input")), } diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 6ba72d7907b..b6491352d1c 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -1249,3 +1249,24 @@ fn test_empty_delimiter_whitespace_rejection() { .fails_with_code(2) .stderr_contains("invalid suffix in input"); } + +#[test] +fn test_null_byte_input() { + new_ucmd!() + .pipe_in("1000\x00") + .succeeds() + .stdout_is("1000\n"); +} + +#[test] +fn test_null_byte_input_multiline() { + new_ucmd!() + .pipe_in("1000\x00\n2000\x00") + .succeeds() + .stdout_is("1000\n2000\n"); + + new_ucmd!() + .pipe_in("1000\x002000\n3000") + .succeeds() + .stdout_is("1000\n3000\n"); +} \ No newline at end of file From daa5a8df97feda745a589fc228885c5e37353dba Mon Sep 17 00:00:00 2001 From: FidelSch Date: Fri, 27 Feb 2026 12:59:41 -0300 Subject: [PATCH 2/4] fix(numfmt): Replace cloned() with copied() --- src/uu/numfmt/src/numfmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index 99c56de517a..b2d1eecfc66 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -76,7 +76,7 @@ fn write_line( let line = input_line .iter() .take_while(|&&b| b != b'\0') - .cloned() + .copied() .collect::>(); let handled_line = if options.delimiter.is_some() { From f6504707bdcdc08052a1c116e107d0c59e5f30eb Mon Sep 17 00:00:00 2001 From: FidelSch Date: Fri, 27 Feb 2026 14:20:10 -0300 Subject: [PATCH 3/4] format test_numfmt --- tests/by-util/test_numfmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index b6491352d1c..999e5b80ddd 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -1269,4 +1269,4 @@ fn test_null_byte_input_multiline() { .pipe_in("1000\x002000\n3000") .succeeds() .stdout_is("1000\n3000\n"); -} \ No newline at end of file +} From fb271c24c8043af4e83e832f84f32d0de62ad014 Mon Sep 17 00:00:00 2001 From: FidelSch Date: Sun, 1 Mar 2026 20:23:39 -0300 Subject: [PATCH 4/4] fix(numfmt): update line termination handling for consistency with GNU behavior --- src/uu/numfmt/src/format.rs | 20 ++++---- src/uu/numfmt/src/numfmt.rs | 75 +++++++++++++++++------------- tests/by-util/test_numfmt.rs | 90 ++++++++++++++++++------------------ 3 files changed, 97 insertions(+), 88 deletions(-) diff --git a/src/uu/numfmt/src/format.rs b/src/uu/numfmt/src/format.rs index 3a5db2ba060..2c55d8de305 100644 --- a/src/uu/numfmt/src/format.rs +++ b/src/uu/numfmt/src/format.rs @@ -473,6 +473,7 @@ pub fn write_formatted_with_delimiter( writer: &mut W, input: &[u8], options: &NumfmtOptions, + eol: Option, ) -> Result<()> { let delimiter = options.delimiter.as_deref().unwrap(); @@ -497,12 +498,9 @@ pub fn write_formatted_with_delimiter( } } - let eol = if options.zero_terminated { - b"\0" - } else { - b"\n" - }; - writer.write_all(eol).unwrap(); + if let Some(eol) = eol { + writer.write_all(&[eol]).unwrap(); + } Ok(()) } @@ -511,6 +509,7 @@ pub fn write_formatted_with_whitespace( writer: &mut W, s: &str, options: &NumfmtOptions, + eol: Option, ) -> Result<()> { for (n, (prefix, field)) in (1..).zip(WhitespaceSplitter { s: Some(s) }) { let field_selected = uucore::ranges::contain(&options.fields, n); @@ -548,12 +547,9 @@ pub fn write_formatted_with_whitespace( } } - let eol = if options.zero_terminated { - b"\0" - } else { - b"\n" - }; - writer.write_all(eol).unwrap(); + if let Some(eol) = eol { + writer.write_all(&[eol]).unwrap(); + } Ok(()) } diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index b2d1eecfc66..06de22df4ad 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -14,8 +14,7 @@ use crate::options::{ use crate::units::{Result, Unit}; use clap::{Arg, ArgAction, ArgMatches, Command, builder::ValueParser, parser::ValueSource}; use std::ffi::OsString; -use std::io::{BufRead, Error, Write as _, stderr}; -use std::result::Result as StdResult; +use std::io::{BufRead, Write as _, stderr}; use std::str::FromStr; use units::{IEC_BASES, SI_BASES}; @@ -33,37 +32,51 @@ mod units; fn handle_args<'a>(args: impl Iterator, options: &NumfmtOptions) -> UResult<()> { let mut stdout = std::io::stdout().lock(); + let terminator = if options.zero_terminated { 0u8 } else { b'\n' }; for l in args { - write_line(&mut stdout, l, options)?; + write_line(&mut stdout, l, options, Some(terminator))?; } Ok(()) } -fn handle_buffer(input: R, options: &NumfmtOptions) -> UResult<()> -where - R: BufRead, -{ +fn handle_buffer(mut input: R, options: &NumfmtOptions) -> UResult<()> { let terminator = if options.zero_terminated { 0u8 } else { b'\n' }; - handle_buffer_iterator(input.split(terminator), options, terminator) -} - -fn handle_buffer_iterator( - iter: impl Iterator, Error>>, - options: &NumfmtOptions, - terminator: u8, -) -> UResult<()> { let mut stdout = std::io::stdout().lock(); - for (idx, line_result) in iter.enumerate() { - match line_result { - Ok(line) if idx < options.header => { - stdout.write_all(&line)?; - stdout.write_all(&[terminator])?; - Ok(()) + let mut buf = Vec::new(); + let mut idx = 0; + + loop { + buf.clear(); + let n = input + .read_until(terminator, &mut buf) + .map_err(|e| NumfmtError::IoError(e.to_string()))?; + if n == 0 { + break; + } + + let has_terminator = buf.last() == Some(&terminator); + let line = if has_terminator { + &buf[..buf.len() - 1] + } else { + &buf[..] + }; + + // Emit the terminator only if the input line had one. + // i.e. if the last line of the input does not end with a newline, we should not add one. + let eol = has_terminator.then_some(terminator); + + if idx < options.header { + stdout.write_all(line)?; + if let Some(t) = eol { + stdout.write_all(&[t])?; } - Ok(line) => write_line(&mut stdout, &line, options), - Err(err) => return Err(Box::new(NumfmtError::IoError(err.to_string()))), - }?; + } else { + write_line(&mut stdout, line, options, eol)?; + } + + idx += 1; } + Ok(()) } @@ -71,6 +84,7 @@ fn write_line( writer: &mut W, input_line: &[u8], options: &NumfmtOptions, + eol: Option, ) -> UResult<()> { // Read lines only up to null byte (as GNU does) let line = input_line @@ -80,11 +94,11 @@ fn write_line( .collect::>(); let handled_line = if options.delimiter.is_some() { - write_formatted_with_delimiter(writer, &line, options) + write_formatted_with_delimiter(writer, &line, options, eol) } else { // Whitespace mode requires valid UTF-8 match std::str::from_utf8(&line) { - Ok(s) => write_formatted_with_whitespace(writer, s, options), + Ok(s) => write_formatted_with_whitespace(writer, s, options, eol), Err(_) => Err(translate!("numfmt-error-invalid-input")), } }; @@ -104,12 +118,9 @@ fn write_line( } writer.write_all(input_line)?; - let eol = if options.zero_terminated { - b"\0" - } else { - b"\n" - }; - writer.write_all(eol)?; + if let Some(eol) = eol { + writer.write_all(&[eol])?; + } } Ok(()) diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 999e5b80ddd..4e454ef5dec 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -33,7 +33,7 @@ fn test_from_si() { .args(&["--from=si"]) .pipe_in("1000\n1.1M\n0.1G") .succeeds() - .stdout_is("1000\n1100000\n100000000\n"); + .stdout_is("1000\n1100000\n100000000"); } #[test] @@ -42,7 +42,7 @@ fn test_from_iec() { .args(&["--from=iec"]) .pipe_in("1024\n1.1M\n0.1G") .succeeds() - .stdout_is("1024\n1153434\n107374183\n"); + .stdout_is("1024\n1153434\n107374183"); } #[test] @@ -51,7 +51,7 @@ fn test_from_iec_i() { .args(&["--from=iec-i"]) .pipe_in("1.1Mi\n0.1Gi") .succeeds() - .stdout_is("1153434\n107374183\n"); + .stdout_is("1153434\n107374183"); } #[test] @@ -84,7 +84,7 @@ fn test_from_auto() { .args(&["--from=auto"]) .pipe_in("1K\n1Ki") .succeeds() - .stdout_is("1000\n1024\n"); + .stdout_is("1000\n1024"); } #[test] @@ -93,7 +93,7 @@ fn test_to_si() { .args(&["--to=si"]) .pipe_in("1000\n1100000\n100000000") .succeeds() - .stdout_is("1.0k\n1.1M\n100M\n"); + .stdout_is("1.0k\n1.1M\n100M"); } #[test] @@ -102,7 +102,7 @@ fn test_to_iec() { .args(&["--to=iec"]) .pipe_in("1024\n1153434\n107374182") .succeeds() - .stdout_is("1.0K\n1.2M\n103M\n"); + .stdout_is("1.0K\n1.2M\n103M"); } #[test] @@ -111,7 +111,7 @@ fn test_to_iec_i() { .args(&["--to=iec-i"]) .pipe_in("1024\n1153434\n107374182") .succeeds() - .stdout_is("1.0Ki\n1.2Mi\n103Mi\n"); + .stdout_is("1.0Ki\n1.2Mi\n103Mi"); } #[test] @@ -128,7 +128,7 @@ fn test_padding() { .args(&["--from=si", "--padding=8"]) .pipe_in("1K\n1.1M\n0.1G") .succeeds() - .stdout_is(" 1000\n 1100000\n100000000\n"); + .stdout_is(" 1000\n 1100000\n100000000"); } #[test] @@ -137,7 +137,7 @@ fn test_negative_padding() { .args(&["--from=si", "--padding=-8"]) .pipe_in("1K\n1.1M\n0.1G") .succeeds() - .stdout_is("1000 \n1100000 \n100000000\n"); + .stdout_is("1000 \n1100000 \n100000000"); } #[test] @@ -146,7 +146,7 @@ fn test_header() { .args(&["--from=si", "--header=2"]) .pipe_in("header\nheader2\n1K\n1.1M\n0.1G") .succeeds() - .stdout_is("header\nheader2\n1000\n1100000\n100000000\n"); + .stdout_is("header\nheader2\n1000\n1100000\n100000000"); } #[test] @@ -155,7 +155,7 @@ fn test_header_default() { .args(&["--from=si", "--header"]) .pipe_in("header\n1K\n1.1M\n0.1G") .succeeds() - .stdout_is("header\n1000\n1100000\n100000000\n"); + .stdout_is("header\n1000\n1100000\n100000000"); } #[test] @@ -188,12 +188,12 @@ fn test_negative() { .args(&["--from=si"]) .pipe_in("-1000\n-1.1M\n-0.1G") .succeeds() - .stdout_is("-1000\n-1100000\n-100000000\n"); + .stdout_is("-1000\n-1100000\n-100000000"); new_ucmd!() .args(&["--to=iec-i"]) .pipe_in("-1024\n-1153434\n-107374182") .succeeds() - .stdout_is("-1.0Ki\n-1.2Mi\n-103Mi\n"); + .stdout_is("-1.0Ki\n-1.2Mi\n-103Mi"); } #[test] @@ -201,7 +201,7 @@ fn test_negative_zero() { new_ucmd!() .pipe_in("-0\n-0.0") .succeeds() - .stdout_is("0\n0.0\n"); + .stdout_is("0\n0.0"); } #[test] @@ -209,7 +209,7 @@ fn test_no_op() { new_ucmd!() .pipe_in("1024\n1234567") .succeeds() - .stdout_is("1024\n1234567\n"); + .stdout_is("1024\n1234567"); } #[test] @@ -218,7 +218,7 @@ fn test_normalize() { .args(&["--from=si", "--to=si"]) .pipe_in("10000000K\n0.001K") .succeeds() - .stdout_is("10G\n1\n"); + .stdout_is("10G\n1"); } #[test] @@ -322,14 +322,14 @@ fn test_should_skip_leading_space_from_stdin() { .args(&["--from=auto"]) .pipe_in(" 2Ki") .succeeds() - .stdout_is("2048\n"); + .stdout_is("2048"); // multi-line new_ucmd!() .args(&["--from=auto"]) .pipe_in("\t1Ki\n 2K") .succeeds() - .stdout_is("1024\n2000\n"); + .stdout_is("1024\n2000"); } #[test] @@ -338,7 +338,7 @@ fn test_should_convert_only_first_number_in_line() { .args(&["--from=auto"]) .pipe_in("1Ki 2M 3G") .succeeds() - .stdout_is("1024 2M 3G\n"); + .stdout_is("1024 2M 3G"); } #[test] @@ -347,13 +347,13 @@ fn test_leading_whitespace_should_imply_padding() { .args(&["--from=auto"]) .pipe_in(" 1K") .succeeds() - .stdout_is(" 1000\n"); + .stdout_is(" 1000"); new_ucmd!() .args(&["--from=auto"]) .pipe_in(" 202Ki") .succeeds() - .stdout_is(" 206848\n"); + .stdout_is(" 206848"); } #[test] @@ -362,7 +362,7 @@ fn test_should_calculate_implicit_padding_per_line() { .args(&["--from=auto"]) .pipe_in(" 1Ki\n 2K") .succeeds() - .stdout_is(" 1024\n 2000\n"); + .stdout_is(" 1024\n 2000"); } #[test] @@ -520,7 +520,7 @@ fn test_delimiter_only() { .args(&["-d", ","]) .pipe_in("1234,56") .succeeds() - .stdout_only("1234,56\n"); + .stdout_only("1234,56"); } #[test] @@ -529,7 +529,7 @@ fn test_line_is_field_with_no_delimiter() { .args(&["-d,", "--to=iec"]) .pipe_in("123456") .succeeds() - .stdout_only("121K\n"); + .stdout_only("121K"); } #[test] @@ -538,7 +538,7 @@ fn test_delimiter_to_si() { .args(&["-d=,", "--to=si"]) .pipe_in("1234,56") .succeeds() - .stdout_only("1.3k,56\n"); + .stdout_only("1.3k,56"); } #[test] @@ -547,7 +547,7 @@ fn test_delimiter_skips_leading_whitespace() { .args(&["-d=,", "--to=si"]) .pipe_in(" \t 1234,56") .succeeds() - .stdout_only("1.3k,56\n"); + .stdout_only("1.3k,56"); } #[test] @@ -556,7 +556,7 @@ fn test_delimiter_preserves_leading_whitespace_in_unselected_fields() { .args(&["-d=|", "--to=si"]) .pipe_in(" 1000| 2000") .succeeds() - .stdout_only("1.0k| 2000\n"); + .stdout_only("1.0k| 2000"); } #[test] @@ -565,7 +565,7 @@ fn test_delimiter_from_si() { .args(&["-d=,", "--from=si"]) .pipe_in("1.2K,56") .succeeds() - .stdout_only("1200,56\n"); + .stdout_only("1200,56"); } #[test] @@ -583,7 +583,7 @@ fn test_delimiter_with_padding() { .args(&["-d=|", "--to=si", "--padding=5"]) .pipe_in("1000|2000") .succeeds() - .stdout_only(" 1.0k|2000\n"); + .stdout_only(" 1.0k|2000"); } #[test] @@ -592,7 +592,7 @@ fn test_delimiter_with_padding_and_fields() { .args(&["-d=|", "--to=si", "--padding=5", "--field=-"]) .pipe_in("1000|2000") .succeeds() - .stdout_only(" 1.0k| 2.0k\n"); + .stdout_only(" 1.0k| 2.0k"); } #[test] @@ -655,7 +655,7 @@ fn test_suffix_is_added_if_not_supplied() { .args(&["--suffix=TEST"]) .pipe_in("1000") .succeeds() - .stdout_only("1000TEST\n"); + .stdout_only("1000TEST"); } #[test] @@ -664,7 +664,7 @@ fn test_suffix_is_preserved() { .args(&["--suffix=TEST"]) .pipe_in("1000TEST") .succeeds() - .stdout_only("1000TEST\n"); + .stdout_only("1000TEST"); } #[test] @@ -673,7 +673,7 @@ fn test_suffix_is_only_applied_to_selected_field() { .args(&["--suffix=TEST", "--field=2"]) .pipe_in("1000 2000 3000") .succeeds() - .stdout_only("1000 2000TEST 3000\n"); + .stdout_only("1000 2000TEST 3000"); } #[test] @@ -682,7 +682,7 @@ fn test_transform_with_suffix_on_input() { .args(&["--suffix=b", "--to=si"]) .pipe_in("2000b") .succeeds() - .stdout_only("2.0kb\n"); + .stdout_only("2.0kb"); } #[test] @@ -691,7 +691,7 @@ fn test_transform_without_suffix_on_input() { .args(&["--suffix=b", "--to=si"]) .pipe_in("2000") .succeeds() - .stdout_only("2.0kb\n"); + .stdout_only("2.0kb"); } #[test] @@ -700,7 +700,7 @@ fn test_transform_with_suffix_and_delimiter() { .args(&["--suffix=b", "--to=si", "-d=|"]) .pipe_in("1000b|2000|3000") .succeeds() - .stdout_only("1.0kb|2000|3000\n"); + .stdout_only("1.0kb|2000|3000"); } #[test] @@ -709,7 +709,7 @@ fn test_suffix_with_padding() { .args(&["--suffix=pad", "--padding=12"]) .pipe_in("1000 2000 3000") .succeeds() - .stdout_only(" 1000pad 2000 3000\n"); + .stdout_only(" 1000pad 2000 3000"); } #[test] @@ -732,7 +732,7 @@ fn test_invalid_stdin_number_with_warn_returns_status_0() { .args(&["--invalid=warn"]) .pipe_in("4Q") .succeeds() - .stdout_is("4Q\n") + .stdout_is("4Q") .stderr_is("numfmt: rejecting suffix in input: '4Q' (consider using --from)\n"); } @@ -742,7 +742,7 @@ fn test_invalid_stdin_number_with_ignore_returns_status_0() { .args(&["--invalid=ignore"]) .pipe_in("4Q") .succeeds() - .stdout_only("4Q\n"); + .stdout_only("4Q"); } #[test] @@ -760,7 +760,7 @@ fn test_invalid_stdin_number_with_fail_returns_status_2() { .args(&["--invalid=fail"]) .pipe_in("4Q") .fails_with_code(2) - .stdout_is("4Q\n") + .stdout_is("4Q") .stderr_is("numfmt: rejecting suffix in input: '4Q' (consider using --from)\n"); } @@ -1126,7 +1126,7 @@ fn test_zero_terminated_command_line_args() { #[test] fn test_zero_terminated_input() { let values = vec![ - ("1000", "1.0k\x00"), + ("1000", "1.0k"), ("1000\x00", "1.0k\x00"), ("1000\x002000\x00", "1.0k\x002.0k\x00"), ]; @@ -1253,9 +1253,11 @@ fn test_empty_delimiter_whitespace_rejection() { #[test] fn test_null_byte_input() { new_ucmd!() - .pipe_in("1000\x00") + .pipe_in("1000\x00\n") .succeeds() .stdout_is("1000\n"); + + new_ucmd!().pipe_in("1000\x00").succeeds().stdout_is("1000"); } #[test] @@ -1263,10 +1265,10 @@ fn test_null_byte_input_multiline() { new_ucmd!() .pipe_in("1000\x00\n2000\x00") .succeeds() - .stdout_is("1000\n2000\n"); + .stdout_is("1000\n2000"); new_ucmd!() .pipe_in("1000\x002000\n3000") .succeeds() - .stdout_is("1000\n3000\n"); + .stdout_is("1000\n3000"); }