diff --git a/src/uu/numfmt/src/format.rs b/src/uu/numfmt/src/format.rs index 2c55d8de305..57abc84588f 100644 --- a/src/uu/numfmt/src/format.rs +++ b/src/uu/numfmt/src/format.rs @@ -452,6 +452,26 @@ fn format_string( )) } +/// Encodes a byte slice as a string, representing non-UTF-8 bytes and non-printable ASCII +/// bytes as octal escapes. Valid UTF-8 multi-byte characters pass through unchanged. +/// Used to safely format invalid input in error messages. +pub(crate) fn escape_line(line: &[u8]) -> String { + let mut result = String::new(); + for chunk in line.utf8_chunks() { + for c in chunk.valid().chars() { + if c.is_ascii() && !c.is_ascii_graphic() && !c.is_ascii_whitespace() { + result.push_str(&format!("\\{:03o}", c as u8)); + } else { + result.push(c); + } + } + for &b in chunk.invalid() { + result.push_str(&format!("\\{b:03o}")); + } + } + result +} + fn split_bytes<'a>(input: &'a [u8], delim: &'a [u8]) -> impl Iterator { let mut remainder = Some(input); std::iter::from_fn(move || { @@ -488,7 +508,7 @@ pub fn write_formatted_with_delimiter( if field_selected { // Field must be valid UTF-8 for numeric conversion let field_str = std::str::from_utf8(field) - .map_err(|_| translate!("numfmt-error-invalid-number", "input" => String::from_utf8_lossy(field).into_owned().quote()))? + .map_err(|_| translate!("numfmt-error-invalid-number", "input" => escape_line(field).quote()))? .trim_start(); let formatted = format_string(field_str, options, None)?; writer.write_all(formatted.as_bytes()).unwrap(); diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index 06de22df4ad..bcc4a482567 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -4,7 +4,7 @@ // file that was distributed with this source code. use crate::errors::NumfmtError; -use crate::format::{write_formatted_with_delimiter, write_formatted_with_whitespace}; +use crate::format::{escape_line, write_formatted_with_delimiter, write_formatted_with_whitespace}; use crate::options::{ DEBUG, DELIMITER, FIELD, FIELD_DEFAULT, FORMAT, FROM, FROM_DEFAULT, FROM_UNIT, FROM_UNIT_DEFAULT, FormatOptions, HEADER, HEADER_DEFAULT, INVALID, InvalidModes, NUMBER, @@ -99,7 +99,9 @@ fn write_line( // Whitespace mode requires valid UTF-8 match std::str::from_utf8(&line) { Ok(s) => write_formatted_with_whitespace(writer, s, options, eol), - Err(_) => Err(translate!("numfmt-error-invalid-input")), + Err(_) => Err( + translate!("numfmt-error-invalid-number", "input" => escape_line(&line).quote()), + ), } }; diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 4e454ef5dec..c9ff617c1e9 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -1272,3 +1272,13 @@ fn test_null_byte_input_multiline() { .succeeds() .stdout_is("1000\n3000"); } + +#[test] +fn test_invalid_utf8_input() { + // 0xFF is invalid UTF-8 + new_ucmd!() + .pipe_in([b'1', b'0', b'\n', b'\xFF']) + .fails_with_code(2) + .stdout_is("10\n") + .stderr_is("numfmt: invalid number: '\\377'\n"); +}