Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/uu/numfmt/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,26 @@ fn format_string(
))
}

/// Encodes a byte slice as a string, representing non-UTF-8 bytes and non-printable ASCII
/// bytes as octal escapes. Valid UTF-8 multi-byte characters pass through unchanged.
/// Used to safely format invalid input in error messages.
pub(crate) fn escape_line(line: &[u8]) -> String {
let mut result = String::new();
for chunk in line.utf8_chunks() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, I didn't know there is such a function in the std :)

for c in chunk.valid().chars() {
if c.is_ascii() && !c.is_ascii_graphic() && !c.is_ascii_whitespace() {
result.push_str(&format!("\\{:03o}", c as u8));
} else {
result.push(c);
}
}
for &b in chunk.invalid() {
result.push_str(&format!("\\{b:03o}"));
}
}
result
}

fn split_bytes<'a>(input: &'a [u8], delim: &'a [u8]) -> impl Iterator<Item = &'a [u8]> {
let mut remainder = Some(input);
std::iter::from_fn(move || {
Expand Down Expand Up @@ -488,7 +508,7 @@ pub fn write_formatted_with_delimiter<W: std::io::Write>(
if field_selected {
// Field must be valid UTF-8 for numeric conversion
let field_str = std::str::from_utf8(field)
.map_err(|_| translate!("numfmt-error-invalid-number", "input" => String::from_utf8_lossy(field).into_owned().quote()))?
.map_err(|_| translate!("numfmt-error-invalid-number", "input" => escape_line(field).quote()))?
.trim_start();
let formatted = format_string(field_str, options, None)?;
writer.write_all(formatted.as_bytes()).unwrap();
Expand Down
6 changes: 4 additions & 2 deletions src/uu/numfmt/src/numfmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// file that was distributed with this source code.

use crate::errors::NumfmtError;
use crate::format::{write_formatted_with_delimiter, write_formatted_with_whitespace};
use crate::format::{escape_line, write_formatted_with_delimiter, write_formatted_with_whitespace};
use crate::options::{
DEBUG, DELIMITER, FIELD, FIELD_DEFAULT, FORMAT, FROM, FROM_DEFAULT, FROM_UNIT,
FROM_UNIT_DEFAULT, FormatOptions, HEADER, HEADER_DEFAULT, INVALID, InvalidModes, NUMBER,
Expand Down Expand Up @@ -99,7 +99,9 @@ fn write_line<W: std::io::Write>(
// Whitespace mode requires valid UTF-8
match std::str::from_utf8(&line) {
Ok(s) => write_formatted_with_whitespace(writer, s, options, eol),
Err(_) => Err(translate!("numfmt-error-invalid-input")),
Err(_) => Err(
translate!("numfmt-error-invalid-number", "input" => escape_line(&line).quote()),
),
}
};

Expand Down
10 changes: 10 additions & 0 deletions tests/by-util/test_numfmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1272,3 +1272,13 @@ fn test_null_byte_input_multiline() {
.succeeds()
.stdout_is("1000\n3000");
}

#[test]
fn test_invalid_utf8_input() {
// 0xFF is invalid UTF-8
new_ucmd!()
.pipe_in([b'1', b'0', b'\n', b'\xFF'])
.fails_with_code(2)
.stdout_is("10\n")
.stderr_is("numfmt: invalid number: '\\377'\n");
}
Loading