diff --git a/src/uu/pr/locales/en-US.ftl b/src/uu/pr/locales/en-US.ftl index c7fa178e4bd..59af328dcb9 100644 --- a/src/uu/pr/locales/en-US.ftl +++ b/src/uu/pr/locales/en-US.ftl @@ -84,11 +84,14 @@ pr-help-indent = pr-help-join-lines = merge full lines, turns off -W line truncation, no column alignment, --sep-string[=STRING] sets separators +pr-help-expand-tabs = expand input CHARs (TABs) to tab WIDTH (8) pr-help-help = Print help information # Page header text pr-page = Page +pr-try-help-message = Try 'pr --help' for more information. + # Error messages pr-error-reading-input = pr: Reading from input {$file} gave error pr-error-unknown-filetype = pr: {$file}: unknown filetype @@ -98,3 +101,4 @@ pr-error-no-such-file = pr: cannot open {$file}, No such file or directory pr-error-column-merge-conflict = cannot specify number of columns when printing in parallel pr-error-across-merge-conflict = cannot specify both printing across and printing in parallel pr-error-invalid-pages-range = invalid --pages argument '{$start}:{$end}' +pr-error-invalid-expand-tab-argument ='-e' extra characters or invalid number in the argument: ‘{$arg}’ diff --git a/src/uu/pr/locales/fr-FR.ftl b/src/uu/pr/locales/fr-FR.ftl index af596787235..9d9a23407f8 100644 --- a/src/uu/pr/locales/fr-FR.ftl +++ b/src/uu/pr/locales/fr-FR.ftl @@ -83,11 +83,14 @@ pr-help-indent = pr-help-join-lines = fusionner les lignes complètes, désactive la troncature de ligne -W, aucun alignement de colonne, --sep-string[=CHAÎNE] définit les séparateurs +pr-help-expand-tabs = convertir les CHARs d'entrée (TABs) en largeur de tabulation WIDTH (8) pr-help-help = Afficher les informations d'aide # Texte d'en-tête de page pr-page = Page +pr-try-help-message = Essayez 'pr --help' pour plus d'informations. + # Messages d'erreur pr-error-reading-input = pr : La lecture depuis l'entrée {$file} a donné une erreur pr-error-unknown-filetype = pr : {$file} : type de fichier inconnu @@ -97,3 +100,4 @@ pr-error-no-such-file = pr : impossible d'ouvrir {$file}, Aucun fichier ou répe pr-error-column-merge-conflict = impossible de spécifier le nombre de colonnes lors de l'impression en parallèle pr-error-across-merge-conflict = impossible de spécifier à la fois l'impression transversale et l'impression en parallèle pr-error-invalid-pages-range = argument --pages invalide '{$start}:{$end}' +pr-error-invalid-expand-tab-argument = Caractères supplémentaires ou nombre invalide dans l'argument de '-e': '{$arg}' diff --git a/src/uu/pr/src/pr.rs b/src/uu/pr/src/pr.rs index f000abbd72f..6e34452ad10 100644 --- a/src/uu/pr/src/pr.rs +++ b/src/uu/pr/src/pr.rs @@ -11,6 +11,7 @@ use itertools::Itertools; use regex::Regex; use std::fs::metadata; use std::io::{Read, Write, stderr, stdin, stdout}; +use std::str::Utf8Error; use std::string::FromUtf8Error; use std::time::SystemTime; use thiserror::Error; @@ -57,6 +58,7 @@ mod options { pub const JOIN_LINES: &str = "join-lines"; pub const HELP: &str = "help"; pub const FILES: &str = "files"; + pub const EXPAND_TABS: &str = "expand-tabs"; } struct OutputOptions { @@ -79,6 +81,7 @@ struct OutputOptions { join_lines: bool, col_sep_for_printing: String, line_width: Option, + expand_tabs: Option, } /// One line of an input file, annotated with file, page, and line number. @@ -96,10 +99,24 @@ impl FileLine { page_number: usize, line_number: usize, buf: &[u8], - ) -> Result { + options: &OutputOptions, + ) -> Result { // TODO Don't read bytes to String just to directly write them // out again anyway. - let line_content = String::from_utf8(buf.to_vec())?; + let line_content = if let Some(expand_tabs) = &options.expand_tabs { + // Anticipate a few expandable chars to reduce reallocations + let mut line_content = + String::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize); + // validate utf correctness + let s = std::str::from_utf8(buf)?; + for b in s.as_bytes() { + apply_expand_tab(&mut line_content, *b, expand_tabs); + } + line_content + } else { + String::from_utf8(buf.to_vec())? + }; + Ok(Self { file_id, page_number, @@ -123,6 +140,21 @@ struct NumberingMode { first_number: usize, } +#[derive(Debug)] +struct ExpandTabsOptions { + input_char: char, + width: i32, +} + +impl Default for ExpandTabsOptions { + fn default() -> Self { + Self { + width: 8, + input_char: TAB, + } + } +} + impl Default for NumberingMode { fn default() -> Self { Self { @@ -149,6 +181,14 @@ impl From for PrError { } } +impl From for PrError { + fn from(err: Utf8Error) -> Self { + Self::EncounteredErrors { + msg: err.to_string(), + } + } +} + #[derive(Debug, Error)] enum PrError { #[error("pr: {msg}")] @@ -326,6 +366,14 @@ pub fn uu_app() -> Command { .action(ArgAction::Append) .value_hint(clap::ValueHint::FilePath), ) + .arg( + Arg::new(options::EXPAND_TABS) + .long(options::EXPAND_TABS) + .short('e') + .num_args(1) + .value_name("[CHAR][WIDTH]") + .help(translate!("pr-help-expand-tabs")), + ) } #[uucore::main] @@ -390,6 +438,7 @@ fn recreate_arguments(args: &[String]) -> Vec { let column_page_option = Regex::new(r"^[-+]\d+.*").unwrap(); let num_regex = Regex::new(r"^[^-]\d*$").unwrap(); let n_regex = Regex::new(r"^-n\s*$").unwrap(); + let e_regex = Regex::new(r"^-e").unwrap(); let mut arguments = args.to_owned(); let num_option = args.iter().find_position(|x| n_regex.is_match(x.trim())); if let Some((pos, _value)) = num_option { @@ -402,6 +451,17 @@ fn recreate_arguments(args: &[String]) -> Vec { } } + // To ensure not to accidentally delete the next argument after a short flag for -e we insert + // the default values for the -e flag is '-e' is present without direct arguments. + let expand_tabs_option = arguments + .iter() + .find_position(|x| e_regex.is_match(x.trim())); + if let Some((pos, value)) = expand_tabs_option { + if value.trim().len() <= 2 { + arguments[pos] = "-e\t8".to_string(); + } + } + arguments .into_iter() .filter(|i| !column_page_option.is_match(i)) @@ -523,6 +583,26 @@ fn build_options( } }); + let expand_tabs = matches + .get_one::(options::EXPAND_TABS) + .map(|s| { + s.chars().next().map_or(Ok(ExpandTabsOptions::default()), |c| { + if c.is_ascii_digit() { + s + .parse() + .map_err(|_e| PrError::EncounteredErrors { msg: format!("{}\n{}", translate!("pr-error-invalid-expand-tab-argument", "arg" => s), translate!("pr-try-help-message")) }) + .map(|width| ExpandTabsOptions{input_char: TAB, width}) + } else if s.len() > 1 { + s[1..] + .parse() + .map_err(|_e| PrError::EncounteredErrors { msg: format!("{}\n{}", translate!("pr-error-invalid-expand-tab-argument", "arg" => &s[1..]), translate!("pr-try-help-message")) }) + .map(|width| ExpandTabsOptions{input_char: c, width}) + } else { + Ok(ExpandTabsOptions{input_char: c, width: 8}) + } + }) + }).transpose()?; + let double_space = matches.get_flag(options::DOUBLE_SPACE); let content_line_separator = if double_space { @@ -759,6 +839,7 @@ fn build_options( join_lines, col_sep_for_printing, line_width, + expand_tabs, }) } @@ -776,6 +857,27 @@ fn read_to_end(path: &str) -> Result, std::io::Error> { } } +fn apply_expand_tab(chunk: &mut String, byte: u8, expand_options: &ExpandTabsOptions) { + if byte == expand_options.input_char as u8 { + // If the byte encountered is the input char we use width to calculate + // the amount of spaces needed (if no input char given we stored '\t' + // in our struct) + let spaces_needed = + expand_options.width as usize - (chunk.len() % expand_options.width as usize); + chunk.extend(std::iter::repeat_n(' ', spaces_needed)); + } else if byte == TAB as u8 { + // If a byte got passed to the -e flag (eg -ea1) which is not '\t' GNU + // still expands it but does not use an optionally given width parameter + // but does the '\t' expansion with the default value (8) + let spaces_needed = 8 - (chunk.len() % 8); + chunk.extend(std::iter::repeat_n(' ', spaces_needed)); + } else { + // This arm means the byte is neither '\t' nor the bytes to be + // expanded + chunk.push(byte as char); + } +} + fn pr(path: &str, options: &OutputOptions) -> Result { // Read the entire contents of the file into a buffer. // @@ -805,7 +907,7 @@ fn get_pages( options: &OutputOptions, file_id: usize, buf: &[u8], -) -> Result)>, FromUtf8Error> { +) -> Result)>, PrError> { let start_page = options.start_page; let end_page = options.end_page; let lines_needed_per_page = lines_to_read_for_page(options); @@ -840,7 +942,8 @@ fn get_pages( // If the file has the pattern `\n\f`, don't treat the // `\f` as its own line; instead ignore the empty line. } else { - let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i])?; + let file_line = + FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?; page.push(file_line); } @@ -865,7 +968,8 @@ fn get_pages( // If the file has the pattern `\f\n`, don't treat the // `\n` as its own line; instead ignore the empty line. } else { - let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i])?; + let file_line = + FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?; page.push(file_line); line_num += 1; } @@ -887,7 +991,7 @@ fn get_pages( // Consider all trailing bytes as the last line. if prev < buf.len() { - let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..])?; + let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options)?; page.push(file_line); } diff --git a/tests/by-util/test_pr.rs b/tests/by-util/test_pr.rs index 9c8c4264eb0..02735a2ab33 100644 --- a/tests/by-util/test_pr.rs +++ b/tests/by-util/test_pr.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) Sdivide +// spell-checker:ignore (ToDO) Sdivide ading use jiff::{Timestamp, ToSpan}; use regex::Regex; @@ -756,3 +756,120 @@ fn test_merge_one_long_one_short() { .succeeds() .stdout_matches(®ex); } + +#[test] +fn test_simple_expand_tab() { + let whitespace = " ".repeat(50); + let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d"; + let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n"); + + let output_regex = Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap(); + + new_ucmd!() + .arg("-e") + .pipe_in("hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n") + .succeeds() + .stdout_matches(&output_regex); +} + +#[test] +fn test_simple_expand_tab_with_digit_argument() { + let whitespace = " ".repeat(50); + let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d"; + let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n"); + let input = "hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n"; + + let test_cases = vec![ + ("-e2", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()), + ("-e3", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()), + ("-e8", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()), + ("-e10", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()), + ]; + for (arg, output_regex) in test_cases { + new_ucmd!() + .arg(arg) + .pipe_in(input) + .succeeds() + .stdout_matches(&output_regex); + } +} + +#[test] +fn test_simple_expand_tab_with_char_argument() { + let whitespace = " ".repeat(50); + let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d"; + let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n"); + let input = "hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n"; + + let test_cases = vec![ + ("-ea", Regex::new(&format!("{page_1_beginning}hello world\n bc def\n le ding\ntr il \n8ch rs00 \n")).unwrap()), + ("-ee", Regex::new(&format!("{page_1_beginning}h llo world\nabc d f\n l ading\ntrail \n8chars00 \n")).unwrap()), + ]; + for (arg, output_regex) in test_cases { + new_ucmd!() + .arg(arg) + .pipe_in(input) + .succeeds() + .stdout_matches(&output_regex); + } +} + +#[test] +fn test_simple_expand_tab_with_both_arguments() { + // test different variations of what char to expand + // a2, e3, t10 + let whitespace = " ".repeat(50); + let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d"; + let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n"); + let input = "hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n"; + + let test_cases = vec![ + ("-ea2", Regex::new(&format!("{page_1_beginning}hello world\n bc def\n le ding\ntr il \n8ch rs00 \n")).unwrap()), + ("-ee3", Regex::new(&format!("{page_1_beginning}h llo world\nabc d f\n l ading\ntrail \n8chars00 \n")).unwrap()), + ("-et10", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\n rail \n8chars00 \n")).unwrap()), + ]; + for (arg, output_regex) in test_cases { + new_ucmd!() + .arg(arg) + .pipe_in(input) + .succeeds() + .stdout_matches(&output_regex); + } +} + +/* cSpell:disable */ +#[test] +fn test_invalid_expand_tab_arguments() { + let test_file_path = "empty_test_file"; + + let test_cases = vec![ + // incorrect argument + ("-esdgjiojiosdgjiogd", "dgjiojiosdgjiogd"), + // 2 non digit parameter + ("-eab", "b"), + // non digit after first digit + ("-e1a", "1a"), + // non digit after first digit after allowed input char + ("-ea1a", "1a"), + // > i32 max + ("-e2147483648", "2147483648"), + // > i32 max after allowed input char + ("-ea2147483648", "2147483648"), + ]; + + for (arg, error_msg_field) in test_cases { + new_ucmd!() + .args(&[arg, test_file_path]) + .fails() + .stderr_contains(format!("pr: '-e' extra characters or invalid number in the argument: ‘{error_msg_field}’\nTry 'pr --help' for more information.")); + } +} +/* cSpell:enable */ + +#[test] +fn test_expand_tab_does_not_consume_next_argument() { + let test_file_path = "empty_test_file"; + new_ucmd!().args(&["-e", test_file_path]).succeeds(); + new_ucmd!().args(&["-ea", test_file_path]).succeeds(); + new_ucmd!().args(&["-ea1", test_file_path]).succeeds(); +} diff --git a/tests/fixtures/pr/empty_test_file b/tests/fixtures/pr/empty_test_file new file mode 100644 index 00000000000..e69de29bb2d