diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index 71d4756a70d..8049f5a919a 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -260,57 +260,33 @@ fn next_tab_stop(col_count: usize) -> usize { } fn compute_col_count(buffer: &[u8], mode: WidthMode) -> usize { - match mode { - WidthMode::Characters => { - if let Ok(s) = std::str::from_utf8(buffer) { - let mut width = 0; - for ch in s.chars() { - match ch { - '\r' => width = 0, - '\t' => width = next_tab_stop(width), - '\x08' => width = width.saturating_sub(1), - _ => width += 1, + if let Ok(s) = std::str::from_utf8(buffer) { + let mut width = 0; + for ch in s.chars() { + match ch { + '\r' => width = 0, + '\t' => width = next_tab_stop(width), + '\x08' => width = width.saturating_sub(1), + _ => { + width += match mode { + WidthMode::Characters => 1, + WidthMode::Columns => UnicodeWidthChar::width(ch).unwrap_or(0), } } - width - } else { - let mut width = 0; - for &byte in buffer { - match byte { - CR => width = 0, - TAB => width = next_tab_stop(width), - 0x08 => width = width.saturating_sub(1), - _ => width += 1, - } - } - width } } - WidthMode::Columns => { - if let Ok(s) = std::str::from_utf8(buffer) { - let mut width = 0; - for ch in s.chars() { - match ch { - '\r' => width = 0, - '\t' => width = next_tab_stop(width), - '\x08' => width = width.saturating_sub(1), - _ => width += UnicodeWidthChar::width(ch).unwrap_or(0), - } - } - width - } else { - let mut width = 0; - for &byte in buffer { - match byte { - CR => width = 0, - TAB => width = next_tab_stop(width), - 0x08 => width = width.saturating_sub(1), - _ => width += 1, - } - } - width + width + } else { + let mut width = 0; + for &byte in buffer { + match byte { + CR => width = 0, + TAB => width = next_tab_stop(width), + 0x08 => width = width.saturating_sub(1), + _ => width += 1, } } + width } } diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs index c6ae6b56da0..c371ba4ca58 100644 --- a/tests/by-util/test_fold.rs +++ b/tests/by-util/test_fold.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fullwidth +// spell-checker:ignore fullwidth refgh tefgh nefgh use bytecount::count; use unicode_width::UnicodeWidthChar; @@ -958,3 +958,37 @@ fn test_fullwidth_characters() { .succeeds() .stdout_is(format!("{e_fullwidth}\n{e_fullwidth}")); } + +#[test] +fn test_character_mode_special_chars() { + for (args, input, expected) in [ + // backspace decreases column + (&["-c", "-w", "5"][..], "abcde\x08fg\n", "abcde\x08f\ng\n"), + // carriage return resets column + (&["-c", "-w", "5"], "abcd\refgh\n", "abcd\refgh\n"), + // tab at start exceeds width + (&["-c", "-w", "4"], "\tabc\n", "\t\nabc\n"), + // multiple tabs + (&["-c", "-w", "10"], "a\tb\tc\n", "a\tb\n\tc\n"), + // basic folding + (&["-c", "-w", "3"], "abcdef\n", "abc\ndef\n"), + // preserves empty lines + (&["-c", "-w", "5"], "abc\n\ndef\n", "abc\n\ndef\n"), + // word boundary with -s + (&["-c", "-s", "-w", "5"], "ab cd ef\n", "ab \ncd ef\n"), + // tab as word boundary + (&["-c", "-s", "-w", "10"], "abcd\tefgh\n", "abcd\t\nefgh\n"), + // wide chars count as 1 in -c mode + ( + &["-c", "-w", "3"], + "\u{FF1A}\u{FF1A}\u{FF1A}\u{FF1A}\n", + "\u{FF1A}\u{FF1A}\u{FF1A}\n\u{FF1A}\n", + ), + ] { + new_ucmd!() + .args(args) + .pipe_in(input) + .succeeds() + .stdout_is(expected); + } +}