diff --git a/Cargo.lock b/Cargo.lock index 78e401b03c7..a021c4fecd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3835,6 +3835,9 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", + "libc", + "num-bigint", + "num-traits", "thiserror 2.0.18", "uucore", ] diff --git a/src/uu/numfmt/Cargo.toml b/src/uu/numfmt/Cargo.toml index fed39ad68f3..e202f61fab8 100644 --- a/src/uu/numfmt/Cargo.toml +++ b/src/uu/numfmt/Cargo.toml @@ -22,6 +22,9 @@ clap = { workspace = true } uucore = { workspace = true, features = ["parser", "ranges"] } thiserror = { workspace = true } fluent = { workspace = true } +libc = { workspace = true } +num-bigint = { workspace = true } +num-traits = { workspace = true } [dev-dependencies] divan = { workspace = true } diff --git a/src/uu/numfmt/locales/en-US.ftl b/src/uu/numfmt/locales/en-US.ftl index 4e5faf56e7f..4fb87038cb1 100644 --- a/src/uu/numfmt/locales/en-US.ftl +++ b/src/uu/numfmt/locales/en-US.ftl @@ -40,6 +40,7 @@ numfmt-help-debug = print warnings about invalid input numfmt-help-delimiter = use X instead of whitespace for field delimiter numfmt-help-field = replace the numbers in these input fields; see FIELDS below numfmt-help-format = use printf style floating-point FORMAT; see FORMAT below for details +numfmt-help-grouping = use locale-specific grouping separators for output numfmt-help-from = auto-scale input numbers to UNITs; see UNIT below numfmt-help-from-unit = specify the input unit size numfmt-help-to = auto-scale output numbers to UNITs; see UNIT below @@ -49,6 +50,7 @@ numfmt-help-header = print (without converting) the first N header lines; N defa numfmt-help-round = use METHOD for rounding when scaling numfmt-help-suffix = print SUFFIX after each formatted number, and accept inputs optionally ending with SUFFIX numfmt-help-unit-separator = use STRING to separate the number from any unit when printing; by default, no separator is used +numfmt-help-debug = print warnings about potential problems during processing numfmt-help-invalid = set the failure mode for invalid input numfmt-help-zero-terminated = line delimiter is NUL, not newline @@ -58,6 +60,7 @@ numfmt-error-invalid-unit-size = invalid unit size: { $size } numfmt-error-invalid-padding = invalid padding value { $value } numfmt-error-invalid-header = invalid header value { $value } numfmt-error-grouping-cannot-be-combined-with-to = grouping cannot be combined with --to +numfmt-error-grouping-cannot-be-combined-with-format = --grouping cannot be combined with --format numfmt-error-delimiter-must-be-single-character = the delimiter must be a single character numfmt-error-invalid-number-empty = invalid number: '' numfmt-error-invalid-specific-suffix = invalid suffix in input { $input }: { $suffix } @@ -68,6 +71,10 @@ numfmt-error-rejecting-suffix = rejecting suffix in input: '{ $number }{ $suffix numfmt-error-suffix-unsupported-for-unit = This suffix is unsupported for specified unit numfmt-error-unit-auto-not-supported-with-to = Unit 'auto' isn't supported with --to options numfmt-error-number-too-big = Number is too big and unsupported +numfmt-error-value-too-large-to-be-converted = value too large to be converted: { $input } +numfmt-error-value-too-large-to-be-printed = value too large to be printed: { $value } (consider using --to) +numfmt-error-value-precision-too-large-to-be-printed = value/precision too large to be printed: { $value } (consider using --to) +numfmt-error-value-too-large-to-be-printed-max = value too large to be printed: { $value } (cannot handle values > 999Q) numfmt-error-format-no-percent = format '{ $format }' has no % directive numfmt-error-format-ends-in-percent = format '{ $format }' ends in % numfmt-error-invalid-format-directive = invalid format '{ $format }', directive must be %[0]['][-][N][.][N]f @@ -75,6 +82,7 @@ numfmt-error-invalid-format-width-overflow = invalid format '{ $format }' (width numfmt-error-invalid-precision = invalid precision in format '{ $format }' numfmt-error-format-too-many-percent = format '{ $format }' has too many % directives numfmt-error-unknown-invalid-mode = Unknown invalid mode: { $mode } +numfmt-error-multiple-field-specifications = multiple field specifications # Debug messages numfmt-debug-no-conversion = no conversion option specified diff --git a/src/uu/numfmt/locales/fr-FR.ftl b/src/uu/numfmt/locales/fr-FR.ftl index 20bd91db9a9..8443985500b 100644 --- a/src/uu/numfmt/locales/fr-FR.ftl +++ b/src/uu/numfmt/locales/fr-FR.ftl @@ -39,6 +39,7 @@ numfmt-after-help = Options d'UNITÉ : numfmt-help-delimiter = utiliser X au lieu d'espaces pour le délimiteur de champ numfmt-help-field = remplacer les nombres dans ces champs d'entrée ; voir FIELDS ci-dessous numfmt-help-format = utiliser le FORMAT à virgule flottante de style printf ; voir FORMAT ci-dessous pour les détails +numfmt-help-grouping = utiliser les séparateurs de groupement de la locale pour la sortie numfmt-help-from = mettre automatiquement à l'échelle les nombres d'entrée vers les UNITÉs ; voir UNIT ci-dessous numfmt-help-from-unit = spécifier la taille de l'unité d'entrée numfmt-help-to = mettre automatiquement à l'échelle les nombres de sortie vers les UNITÉs ; voir UNIT ci-dessous @@ -47,7 +48,9 @@ numfmt-help-padding = remplir la sortie à N caractères ; N positif alignera à numfmt-help-header = imprimer (sans convertir) les N premières lignes d'en-tête ; N vaut 1 par défaut si non spécifié numfmt-help-round = utiliser METHOD pour l'arrondi lors de la mise à l'échelle numfmt-help-suffix = imprimer SUFFIX après chaque nombre formaté, et accepter les entrées se terminant optionnellement par SUFFIX +numfmt-help-unit-separator = utiliser STRING pour séparer le nombre de toute unité lors de l'affichage ; par défaut, aucun séparateur n'est utilisé numfmt-help-invalid = définir le mode d'échec pour les entrées invalides +numfmt-help-debug = afficher des avertissements sur d'éventuels problèmes de traitement numfmt-help-zero-terminated = le délimiteur de ligne est NUL, pas retour à la ligne # Messages d'erreur @@ -56,6 +59,7 @@ numfmt-error-invalid-unit-size = taille d'unité invalide : { $size } numfmt-error-invalid-padding = valeur de remplissage invalide { $value } numfmt-error-invalid-header = valeur d'en-tête invalide { $value } numfmt-error-grouping-cannot-be-combined-with-to = le groupement ne peut pas être combiné avec --to +numfmt-error-grouping-cannot-be-combined-with-format = --grouping ne peut pas être combiné avec --format numfmt-error-delimiter-must-be-single-character = le délimiteur doit être un seul caractère numfmt-error-invalid-number-empty = nombre invalide : '' numfmt-error-invalid-suffix = suffixe invalide dans l'entrée : { $input } @@ -66,6 +70,10 @@ numfmt-error-rejecting-suffix = rejet du suffixe dans l'entrée : '{ $number }{ numfmt-error-suffix-unsupported-for-unit = Ce suffixe n'est pas supporté pour l'unité spécifiée numfmt-error-unit-auto-not-supported-with-to = L'unité 'auto' n'est pas supportée avec les options --to numfmt-error-number-too-big = Le nombre est trop grand et non supporté +numfmt-error-value-too-large-to-be-converted = valeur trop grande pour être convertie : { $input } +numfmt-error-value-too-large-to-be-printed = valeur trop grande pour être affichée : { $value } (envisagez d'utiliser --to) +numfmt-error-value-precision-too-large-to-be-printed = valeur/précision trop grande pour être affichée : { $value } (envisagez d'utiliser --to) +numfmt-error-value-too-large-to-be-printed-max = valeur trop grande pour être affichée : { $value } (valeurs > 999Q non prises en charge) numfmt-error-format-no-percent = le format '{ $format }' n'a pas de directive % numfmt-error-format-ends-in-percent = le format '{ $format }' se termine par % numfmt-error-invalid-format-directive = format invalide '{ $format }', la directive doit être %[0]['][-][N][.][N]f @@ -73,3 +81,4 @@ numfmt-error-invalid-format-width-overflow = format invalide '{ $format }' (déb numfmt-error-invalid-precision = précision invalide dans le format '{ $format }' numfmt-error-format-too-many-percent = le format '{ $format }' a trop de directives % numfmt-error-unknown-invalid-mode = Mode invalide inconnu : { $mode } +numfmt-error-multiple-field-specifications = plusieurs spécifications de champs diff --git a/src/uu/numfmt/src/format.rs b/src/uu/numfmt/src/format.rs index 3b1f41aa9d5..f73cdde0172 100644 --- a/src/uu/numfmt/src/format.rs +++ b/src/uu/numfmt/src/format.rs @@ -2,13 +2,21 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore powf +// spell-checker:ignore powf localeconv +use num_bigint::BigUint; +use num_traits::Zero; +use std::io::Write; use uucore::display::Quotable; use uucore::translate; -use crate::options::{NumfmtOptions, RoundMethod, TransformOptions}; +use crate::options::{FormatOptions, NumfmtOptions, RoundMethod, TransformOptions}; use crate::units::{DisplayableSuffix, IEC_BASES, RawSuffix, Result, SI_BASES, Suffix, Unit}; +#[cfg(not(windows))] +use std::ffi::CStr; +#[cfg(not(windows))] +use std::sync::Once; + /// Iterate over a line's fields, where each field is a contiguous sequence of /// non-whitespace, optionally prefixed with one or more characters of leading /// whitespace. Fields are returned as tuples of `(prefix, field)`. @@ -16,7 +24,7 @@ use crate::units::{DisplayableSuffix, IEC_BASES, RawSuffix, Result, SI_BASES, Su /// # Examples: /// /// ``` -/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some(" 1234 5") }; +/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some(" 1234 5"), skip_whitespace: None }; /// /// assert_eq!(Some((" ", "1234")), fields.next()); /// assert_eq!(Some((" ", "5")), fields.next()); @@ -28,17 +36,27 @@ use crate::units::{DisplayableSuffix, IEC_BASES, RawSuffix, Result, SI_BASES, Su /// empty): /// /// ``` -/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some("first second") }; +/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some("first second"), skip_whitespace: None }; /// /// assert_eq!(Some(("", "first")), fields.next()); /// assert_eq!(Some((" ", "second")), fields.next()); /// -/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some("") }; +/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some(""), skip_whitespace: None }; /// /// assert_eq!(Some(("", "")), fields.next()); /// ``` pub struct WhitespaceSplitter<'a> { pub s: Option<&'a str>, + pub skip_whitespace: Option, +} + +fn is_field_whitespace(c: char) -> bool { + // Treat NBSP-like characters as part of a field, not as separators. + // This matches GNU numfmt's handling in locale-sensitive tests. + if matches!(c, '\u{00A0}' | '\u{2007}' | '\u{202F}' | '\u{2060}') { + return false; + } + c.is_whitespace() } impl<'a> Iterator for WhitespaceSplitter<'a> { @@ -48,13 +66,19 @@ impl<'a> Iterator for WhitespaceSplitter<'a> { fn next(&mut self) -> Option { let haystack = self.s?; - let (prefix, field) = haystack.split_at( - haystack - .find(|c: char| !c.is_whitespace()) - .unwrap_or(haystack.len()), - ); + let is_ws = |c: char| { + if let Some(skip) = self.skip_whitespace { + if c == skip { + return false; + } + } + is_field_whitespace(c) + }; - let (field, rest) = field.split_at(field.find(char::is_whitespace).unwrap_or(field.len())); + let (prefix, field) = + haystack.split_at(haystack.find(|c: char| !is_ws(c)).unwrap_or(haystack.len())); + + let (field, rest) = field.split_at(field.find(is_ws).unwrap_or(field.len())); self.s = if rest.is_empty() { None } else { Some(rest) }; @@ -62,6 +86,427 @@ impl<'a> Iterator for WhitespaceSplitter<'a> { } } +fn is_blank_for_suffix(c: char) -> bool { + matches!( + c, + ' ' | '\t' | '\u{00A0}' | '\u{2007}' | '\u{202F}' | '\u{2060}' | '\u{2003}' + ) +} + +fn trim_trailing_blanks(s: &str) -> &str { + s.trim_end_matches(is_blank_for_suffix) +} + +#[cfg(not(windows))] +fn is_c_locale() -> bool { + for key in ["LC_ALL", "LC_NUMERIC", "LANG"] { + if let Ok(value) = std::env::var(key) { + if value.is_empty() { + continue; + } + let lang = value.split('.').next().unwrap_or(&value); + if lang == "C" || lang == "POSIX" || lang.starts_with("C_") || lang.starts_with("C@") { + return true; + } + return false; + } + } + false +} + +#[cfg(not(windows))] +fn init_locale() { + static INIT: Once = Once::new(); + INIT.call_once(|| unsafe { + let _ = libc::setlocale(libc::LC_ALL, c"".as_ptr()); + }); +} + +#[cfg(windows)] +fn init_locale() {} + +#[cfg(not(windows))] +fn locale_decimal_separator_char() -> char { + if is_c_locale() { + return '.'; + } + init_locale(); + unsafe { + let conv = libc::localeconv(); + if conv.is_null() { + return '.'; + } + let c_str = CStr::from_ptr((*conv).decimal_point); + c_str.to_string_lossy().chars().next().unwrap_or('.') + } +} + +#[cfg(windows)] +fn locale_decimal_separator_char() -> char { + // libc::localeconv is unavailable on Windows in this build. + init_locale(); + '.' +} + +#[cfg(not(windows))] +pub(crate) fn locale_grouping_separator_string() -> Option { + if is_c_locale() { + return None; + } + init_locale(); + unsafe { + let conv = libc::localeconv(); + if conv.is_null() { + return None; + } + let c_str = CStr::from_ptr((*conv).thousands_sep); + let sep = c_str.to_string_lossy().to_string(); + if sep.is_empty() { None } else { Some(sep) } + } +} + +#[cfg(windows)] +pub(crate) fn locale_grouping_separator_string() -> Option { + // Locale grouping is not supported on Windows in this implementation. + init_locale(); + None +} + +fn locale_grouping_separator_char() -> Option { + locale_grouping_separator_string().and_then(|sep| sep.chars().next()) +} + +fn decimal_separator_count(s: &str, decimal_sep: char) -> usize { + s.chars().filter(|&c| c == decimal_sep).count() +} + +struct NumberScan { + end: usize, + normalized: String, + digits: usize, +} + +const MAX_CONVERSION_DIGITS: usize = 33; +const MAX_UNSCALED_OUTPUT_DIGITS: usize = 19; + +struct ParsedNumber { + value: f64, + suffix: Option, + scan: NumberScan, +} + +#[derive(Clone)] +struct DecimalValue { + negative: bool, + int_raw: String, + frac_raw: String, + digits: BigUint, + scale: usize, + is_zero: bool, +} + +fn scan_number_prefix( + s: &str, + decimal_sep: char, + grouping_sep: Option, +) -> Option { + let mut chars = s.char_indices().peekable(); + let mut normalized = String::new(); + let mut digits_before = 0usize; + let mut digits_after = 0usize; + let mut seen_decimal = false; + let mut end = 0usize; + + if let Some((idx, ch)) = chars.peek() { + if *ch == '-' || *ch == '+' { + normalized.push(*ch); + end = idx + ch.len_utf8(); + chars.next(); + } + } + + for (idx, ch) in chars { + if ch.is_ascii_digit() { + if seen_decimal { + digits_after += 1; + } else { + digits_before += 1; + } + normalized.push(ch); + end = idx + ch.len_utf8(); + continue; + } + + if ch == decimal_sep { + if seen_decimal { + break; + } + seen_decimal = true; + normalized.push('.'); + end = idx + ch.len_utf8(); + continue; + } + + if grouping_sep.is_some_and(|sep| sep == ch) { + end = idx + ch.len_utf8(); + continue; + } + + break; + } + + let digits = digits_before + digits_after; + if digits == 0 { + return None; + } + if seen_decimal && digits_after == 0 { + return None; + } + + Some(NumberScan { + end, + normalized, + digits, + }) +} + +fn total_significant_digits(scan: &NumberScan) -> usize { + let mut s = scan.normalized.as_str(); + if let Some(rest) = s.strip_prefix('-') { + s = rest; + } else if let Some(rest) = s.strip_prefix('+') { + s = rest; + } + let (int_raw, frac_raw) = s.split_once('.').unwrap_or((s, "")); + let int_trimmed = int_raw.trim_start_matches('0'); + if int_trimmed.is_empty() { + let frac_trimmed = frac_raw.trim_start_matches('0'); + if frac_trimmed.is_empty() { + 1 + } else { + frac_raw.len() + } + } else { + int_trimmed.len() + frac_raw.len() + } +} + +impl DecimalValue { + fn from_scan(scan: &NumberScan) -> Self { + let mut s = scan.normalized.as_str(); + let mut negative = false; + if let Some(rest) = s.strip_prefix('-') { + negative = true; + s = rest; + } else if let Some(rest) = s.strip_prefix('+') { + s = rest; + } + + let (int_raw, frac_raw) = s.split_once('.').unwrap_or((s, "")); + let digits_all = format!("{int_raw}{frac_raw}"); + let digits_trimmed = digits_all.trim_start_matches('0'); + let is_zero = digits_trimmed.is_empty(); + let digits = if is_zero { + BigUint::zero() + } else { + BigUint::parse_bytes(digits_trimmed.as_bytes(), 10).unwrap_or_else(BigUint::zero) + }; + + Self { + negative, + int_raw: int_raw.to_string(), + frac_raw: frac_raw.to_string(), + digits, + scale: frac_raw.len(), + is_zero, + } + } + + fn normalized_string(&self) -> String { + let int_trimmed = self.int_raw.trim_start_matches('0'); + let int_out = if int_trimmed.is_empty() { + "0" + } else { + int_trimmed + }; + let mut out = String::new(); + if self.negative && !self.is_zero { + out.push('-'); + } + out.push_str(int_out); + if !self.frac_raw.is_empty() { + out.push('.'); + out.push_str(&self.frac_raw); + } + out + } + + fn apply_multiplier(&mut self, multiplier: &BigUint) { + if !self.is_zero { + self.digits *= multiplier; + } + } + + fn integer_part(&self) -> BigUint { + if self.scale == 0 { + return self.digits.clone(); + } + if self.is_zero { + return BigUint::zero(); + } + let divisor = pow10(self.scale); + &self.digits / divisor + } + + fn integer_part_len(&self) -> usize { + let int_str = self.integer_part().to_str_radix(10); + int_str.len() + } + + fn scientific_notation(&self) -> String { + let int_str = self.integer_part().to_str_radix(10); + if int_str == "0" { + return "0".to_string(); + } + let mantissa = int_str.chars().next().unwrap_or('0'); + let exp = int_str.len().saturating_sub(1); + let sign = if self.negative && !self.is_zero { + "-" + } else { + "" + }; + format!("{sign}{mantissa}e+{exp}") + } +} + +fn pow10(exp: usize) -> BigUint { + BigUint::from(10u8).pow(exp as u32) +} + +fn pow1024(exp: usize) -> BigUint { + BigUint::from(1024u32).pow(exp as u32) +} + +fn suffix_index(raw: RawSuffix) -> usize { + match raw { + RawSuffix::K => 1, + RawSuffix::M => 2, + RawSuffix::G => 3, + RawSuffix::T => 4, + RawSuffix::P => 5, + RawSuffix::E => 6, + RawSuffix::Z => 7, + RawSuffix::Y => 8, + RawSuffix::R => 9, + RawSuffix::Q => 10, + } +} + +fn suffix_multiplier(suffix: Option, unit: &Unit) -> Result { + let Some((raw, with_i)) = suffix else { + return Ok(BigUint::from(1u8)); + }; + let idx = suffix_index(raw); + match unit { + Unit::Si => Ok(pow10(idx * 3)), + Unit::Iec(_) => Ok(pow1024(idx)), + Unit::Auto => { + if with_i { + Ok(pow1024(idx)) + } else { + Ok(pow10(idx * 3)) + } + } + Unit::None => Ok(BigUint::from(1u8)), + } +} + +fn max_scaled_value(unit: &Unit) -> Option { + match unit { + Unit::Si => Some(BigUint::from(1000u16) * pow10(30)), + Unit::Iec(_) => Some(BigUint::from(1000u16) * pow1024(10)), + Unit::Auto | Unit::None => None, + } +} + +fn maybe_warn_precision_loss(input: &str) { + const MAX_UNSCALED_DIGITS: usize = 18; + let trimmed = trim_trailing_blanks(input); + let decimal_sep = locale_decimal_separator_char(); + let mut grouping_sep = locale_grouping_separator_char(); + if grouping_sep == Some(decimal_sep) { + grouping_sep = None; + } + if let Some(scan) = scan_number_prefix(trimmed, decimal_sep, grouping_sep) { + if scan.digits > MAX_UNSCALED_DIGITS { + uucore::show_error!("large input value '{}': possible precision loss", trimmed); + } + } +} + +fn apply_decimal_separator(num: &str, decimal_sep: char) -> String { + if decimal_sep == '.' { + return num.to_string(); + } + if let Some(pos) = num.find('.') { + let mut out = String::with_capacity(num.len()); + out.push_str(&num[..pos]); + out.push(decimal_sep); + out.push_str(&num[pos + 1..]); + out + } else { + num.to_string() + } +} + +fn apply_grouping(num: &str, grouping_sep: &str, decimal_sep: char) -> String { + let mut parts = num.splitn(2, '.'); + let int_part = parts.next().unwrap_or(""); + let frac_part = parts.next(); + + let (sign, digits) = match int_part.chars().next() { + Some('-') | Some('+') => { + let sign = int_part.chars().next().unwrap(); + (Some(sign), &int_part[1..]) + } + _ => (None, int_part), + }; + + let digits_chars: Vec = digits.chars().collect(); + let len = digits_chars.len(); + let mut out = String::new(); + if let Some(sign) = sign { + out.push(sign); + } + for (i, ch) in digits_chars.iter().enumerate() { + if i > 0 && (len - i) % 3 == 0 { + out.push_str(grouping_sep); + } + out.push(*ch); + } + + if let Some(frac) = frac_part { + out.push(decimal_sep); + out.push_str(frac); + } + + out +} + +fn unit_separator_skip_char(unit_separator: &str, unit_separator_specified: bool) -> Option { + if !unit_separator_specified { + return None; + } + if unit_separator.chars().count() == 1 { + let ch = unit_separator.chars().next().unwrap(); + if ch.is_whitespace() && !ch.is_ascii_whitespace() { + return Some(ch); + } + } + None +} + +#[cfg(test)] fn find_numeric_beginning(s: &str) -> Option<&str> { let mut decimal_point_seen = false; if s.is_empty() { @@ -89,6 +534,7 @@ fn find_numeric_beginning(s: &str) -> Option<&str> { } // finds the valid beginning part of an input string, or None. +#[cfg(test)] fn find_valid_number_with_suffix<'a>(s: &'a str, unit: &Unit) -> Option<&'a str> { let numeric_part = find_numeric_beginning(s)?; @@ -117,75 +563,174 @@ fn find_valid_number_with_suffix<'a>(s: &'a str, unit: &Unit) -> Option<&'a str> } } +#[cfg(test)] fn detailed_error_message(s: &str, unit: &Unit) -> Option { - if s.is_empty() { - return Some(translate!("numfmt-error-invalid-number-empty")); + parse_number_with_suffix(s, unit, "", false).err() +} + +fn parse_number_with_suffix_parts( + s: &str, + unit: &Unit, + unit_separator: &str, + unit_separator_specified: bool, +) -> Result { + let trimmed = trim_trailing_blanks(s); + if trimmed.is_empty() { + return Err(translate!("numfmt-error-invalid-number-empty")); } - let valid_part = find_valid_number_with_suffix(s, unit) - .ok_or(translate!("numfmt-error-invalid-number", "input" => s.quote())) - .ok()?; + let decimal_sep = locale_decimal_separator_char(); + let mut grouping_sep = locale_grouping_separator_char(); + if grouping_sep == Some(decimal_sep) { + grouping_sep = None; + } - if valid_part != s && valid_part.parse::().is_ok() { - return match s.chars().nth(valid_part.len()) { - Some(v) if RawSuffix::try_from(&v).is_ok() => Some( - translate!("numfmt-error-rejecting-suffix", "number" => valid_part, "suffix" => s[valid_part.len()..]), - ), + let scan = scan_number_prefix(trimmed, decimal_sep, grouping_sep); + let Some(scan) = scan else { + if decimal_separator_count(trimmed, decimal_sep) >= 2 { + return Err(translate!( + "numfmt-error-invalid-suffix", + "input" => trimmed.quote() + )); + } + return Err(translate!( + "numfmt-error-invalid-number", + "input" => trimmed.quote() + )); + }; - _ => Some(translate!("numfmt-error-invalid-suffix", "input" => s.quote())), - }; + if total_significant_digits(&scan) > MAX_CONVERSION_DIGITS { + return Err(translate!( + "numfmt-error-value-too-large-to-be-converted", + "input" => trimmed.quote() + )); } - if valid_part != s && valid_part.parse::().is_err() { - return Some( - translate!("numfmt-error-invalid-specific-suffix", "input" => s.quote(), "suffix" => s[valid_part.len()..].quote()), - ); + let number = scan + .normalized + .parse::() + .map_err(|_| translate!("numfmt-error-invalid-number", "input" => trimmed.quote()))?; + + let mut rest = &trimmed[scan.end..]; + + if !rest.is_empty() { + if !unit_separator.is_empty() && rest.starts_with(unit_separator) { + rest = &rest[unit_separator.len()..]; + } else if unit_separator.is_empty() && !unit_separator_specified { + if let Some(first) = rest.chars().next() { + if is_blank_for_suffix(first) { + let mut blank_count = 0usize; + let mut blank_bytes = 0usize; + for (idx, ch) in rest.char_indices() { + if is_blank_for_suffix(ch) { + blank_count += 1; + blank_bytes = idx + ch.len_utf8(); + } else { + break; + } + } + let rest_after = &rest[blank_bytes..]; + if rest_after.is_empty() { + rest = ""; + } else if blank_count == 1 { + rest = rest_after; + } else { + return Err(translate!( + "numfmt-error-invalid-suffix", + "input" => trimmed.quote() + )); + } + } + } + } } - None -} -fn parse_suffix(s: &str, unit: &Unit) -> Result<(f64, Option)> { - if s.is_empty() { - return Err(translate!("numfmt-error-invalid-number-empty")); + if rest.is_empty() { + return Ok(ParsedNumber { + value: number, + suffix: None, + scan, + }); } - let with_i = s.ends_with('i'); - if with_i && ![Unit::Auto, Unit::Iec(true)].contains(unit) { - return Err(translate!("numfmt-error-invalid-suffix", "input" => s.quote())); - } - let mut iter = s.chars(); - if with_i { - iter.next_back(); - } - let suffix = match iter.next_back() { - Some('K') => Some((RawSuffix::K, with_i)), - Some('k') => Some((RawSuffix::K, with_i)), - Some('M') => Some((RawSuffix::M, with_i)), - Some('G') => Some((RawSuffix::G, with_i)), - Some('T') => Some((RawSuffix::T, with_i)), - Some('P') => Some((RawSuffix::P, with_i)), - Some('E') => Some((RawSuffix::E, with_i)), - Some('Z') => Some((RawSuffix::Z, with_i)), - Some('Y') => Some((RawSuffix::Y, with_i)), - Some('R') => Some((RawSuffix::R, with_i)), - Some('Q') => Some((RawSuffix::Q, with_i)), - Some('0'..='9') if !with_i => None, - _ => { - return Err(translate!("numfmt-error-invalid-number", "input" => s.quote())); - } + let mut chars = rest.chars(); + let suffix_char = chars.next().unwrap(); + let Ok(raw_suffix) = RawSuffix::try_from(&suffix_char) else { + return Err(translate!( + "numfmt-error-invalid-suffix", + "input" => trimmed.quote() + )); }; - let suffix_len = match suffix { - None => 0, - Some((_, false)) => 1, - Some((_, true)) => 2, - }; + let mut with_i = false; + let mut remainder = chars.as_str(); + if remainder.starts_with('i') { + if [Unit::Auto, Unit::Iec(true)].contains(unit) { + with_i = true; + remainder = &remainder[1..]; + } else { + let suffix_detail = remainder.trim_start_matches(is_blank_for_suffix); + return Err(translate!( + "numfmt-error-invalid-specific-suffix", + "input" => trimmed.quote(), + "suffix" => suffix_detail.quote() + )); + } + } - let number = s[..s.len() - suffix_len] - .parse::() - .map_err(|_| translate!("numfmt-error-invalid-number", "input" => s.quote()))?; + if matches!(unit, Unit::Iec(true)) && !with_i { + return Err(translate!( + "numfmt-error-missing-i-suffix", + "number" => number, + "suffix" => format!("{raw_suffix:?}") + )); + } - Ok((number, suffix)) + if !remainder.is_empty() { + let suffix_detail = remainder.trim_start_matches(is_blank_for_suffix); + if suffix_detail.is_empty() { + return Err(translate!( + "numfmt-error-invalid-suffix", + "input" => trimmed.quote() + )); + } + return Err(translate!( + "numfmt-error-invalid-specific-suffix", + "input" => trimmed.quote(), + "suffix" => suffix_detail.quote() + )); + } + + if matches!(unit, Unit::None) { + let suffix_str = format!("{raw_suffix:?}{}", if with_i { "i" } else { "" }); + return Err(translate!( + "numfmt-error-rejecting-suffix", + "number" => number, + "suffix" => suffix_str + )); + } + + Ok(ParsedNumber { + value: number, + suffix: Some((raw_suffix, with_i)), + scan, + }) +} + +#[cfg(test)] +fn parse_number_with_suffix( + s: &str, + unit: &Unit, + unit_separator: &str, + unit_separator_specified: bool, +) -> Result<(f64, Option)> { + let parsed = parse_number_with_suffix_parts(s, unit, unit_separator, unit_separator_specified)?; + Ok((parsed.value, parsed.suffix)) +} + +#[cfg(test)] +fn parse_suffix(s: &str, unit: &Unit) -> Result<(f64, Option)> { + parse_number_with_suffix(s, unit, "", false) } /// Returns the implicit precision of a number, which is the count of digits after the dot. For @@ -238,9 +783,7 @@ fn remove_suffix(i: f64, s: Option, u: &Unit) -> Result { } } -fn transform_from(s: &str, opts: &TransformOptions) -> Result { - let (i, suffix) = parse_suffix(s, &opts.from) - .map_err(|original| detailed_error_message(s, &opts.from).unwrap_or(original))?; +fn transform_from_parsed(i: f64, suffix: Option, opts: &TransformOptions) -> Result { let i = i * (opts.from_unit as f64); remove_suffix(i, suffix, &opts.from).map(|n| { @@ -375,6 +918,43 @@ fn transform_to( }) } +fn validate_unscaled_output_size(value: &DecimalValue, precision: usize) -> Result<()> { + let int_len = value.integer_part_len(); + if int_len + precision > MAX_UNSCALED_OUTPUT_DIGITS { + let sci = value.scientific_notation(); + if precision > 0 { + let val = format!("{sci}/{precision}"); + return Err(translate!( + "numfmt-error-value-precision-too-large-to-be-printed", + "value" => val.quote() + )); + } + return Err(translate!( + "numfmt-error-value-too-large-to-be-printed", + "value" => sci.quote() + )); + } + Ok(()) +} + +fn validate_scaled_output_size(value: &DecimalValue, unit: &Unit) -> Result<()> { + let Some(max_value) = max_scaled_value(unit) else { + return Ok(()); + }; + if value.is_zero { + return Ok(()); + } + let threshold = max_value * pow10(value.scale); + if value.digits >= threshold { + let sci = value.scientific_notation(); + return Err(translate!( + "numfmt-error-value-too-large-to-be-printed-max", + "value" => sci.quote() + )); + } + Ok(()) +} + fn format_string( source: &str, options: &NumfmtOptions, @@ -386,6 +966,24 @@ fn format_string( None => source, }; + if options.debug { + maybe_warn_precision_loss(source_without_suffix); + } + + let parsed = parse_number_with_suffix_parts( + source_without_suffix, + &options.transform.from, + &options.unit_separator, + options.unit_separator_specified, + )?; + let decimal = DecimalValue::from_scan(&parsed.scan); + let mut scaled = decimal.clone(); + let multiplier = suffix_multiplier(parsed.suffix, &options.transform.from)?; + scaled.apply_multiplier(&multiplier); + if options.transform.from_unit != 1 { + scaled.apply_multiplier(&BigUint::from(options.transform.from_unit)); + } + let precision = if let Some(p) = options.format.precision { p } else if options.transform.from == Unit::None && options.transform.to == Unit::None { @@ -394,13 +992,41 @@ fn format_string( 0 }; - let number = transform_to( - transform_from(source_without_suffix, &options.transform)?, - &options.transform, - options.round, - precision, - &options.unit_separator, - )?; + if options.transform.to == Unit::None { + validate_unscaled_output_size(&scaled, precision)?; + } else { + validate_scaled_output_size(&scaled, &options.transform.to)?; + } + + let use_raw = options.transform.from == Unit::None + && options.transform.to == Unit::None + && options.transform.from_unit == 1 + && options.transform.to_unit == 1 + && options.format == FormatOptions::default(); + + let mut number = if use_raw { + decimal.normalized_string() + } else { + transform_to( + transform_from_parsed(parsed.value, parsed.suffix, &options.transform)?, + &options.transform, + options.round, + precision, + &options.unit_separator, + )? + }; + + let decimal_sep = locale_decimal_separator_char(); + let grouping_requested = options.grouping || options.format.grouping; + if grouping_requested && options.transform.to == Unit::None { + if let Some(grouping_sep) = locale_grouping_separator_string() { + number = apply_grouping(&number, &grouping_sep, decimal_sep); + } else { + number = apply_decimal_separator(&number, decimal_sep); + } + } else { + number = apply_decimal_separator(&number, decimal_sep); + } // bring back the suffix before applying padding let number_with_suffix = match &options.suffix { @@ -416,16 +1042,16 @@ fn format_string( let padded_number = match padding { 0 => number_with_suffix, p if p > 0 && options.format.zero_padding => { - let zero_padded = format!("{number_with_suffix:0>padding$}", padding = p as usize); + let zero_padded = pad_string(&number_with_suffix, p as usize, '0', false); match implicit_padding.unwrap_or(options.padding) { 0 => zero_padded, - p if p > 0 => format!("{zero_padded:>padding$}", padding = p as usize), - p => format!("{zero_padded: 0 => pad_string(&zero_padded, p as usize, ' ', false), + p => pad_string(&zero_padded, p.unsigned_abs(), ' ', true), } } - p if p > 0 => format!("{number_with_suffix:>padding$}", padding = p as usize), - p => format!("{number_with_suffix: 0 => pad_string(&number_with_suffix, p as usize, ' ', false), + p => pad_string(&number_with_suffix, p.unsigned_abs(), ' ', true), }; Ok(format!( @@ -434,6 +1060,23 @@ fn format_string( )) } +fn pad_string(input: &str, width: usize, pad_char: char, align_left: bool) -> String { + let len = input.chars().count(); + if len >= width { + return input.to_string(); + } + let pad_len = width - len; + let mut out = String::with_capacity(input.len() + pad_len * pad_char.len_utf8()); + if align_left { + out.push_str(input); + out.extend(std::iter::repeat_n(pad_char, pad_len)); + } else { + out.extend(std::iter::repeat_n(pad_char, pad_len)); + out.push_str(input); + } + out +} + fn split_bytes<'a>(input: &'a [u8], delim: &'a [u8]) -> impl Iterator { let mut remainder = Some(input); std::iter::from_fn(move || { @@ -448,7 +1091,11 @@ fn split_bytes<'a>(input: &'a [u8], delim: &'a [u8]) -> impl Iterator Result<()> { +pub fn format_and_print_delimited( + input: &[u8], + options: &NumfmtOptions, + append_eol: bool, +) -> Result<()> { let delimiter = options.delimiter.as_ref().unwrap(); let mut output: Vec = Vec::new(); let eol = if options.zero_terminated { @@ -457,72 +1104,139 @@ pub fn format_and_print_delimited(input: &[u8], options: &NumfmtOptions) -> Resu b'\n' }; - for (n, field) in (1..).zip(split_bytes(input, delimiter)) { - let field_selected = uucore::ranges::contain(&options.fields, n); - - // add delimiter before second and subsequent fields - if n > 1 { - output.extend_from_slice(delimiter); - } - + if delimiter.is_empty() { + let field_selected = uucore::ranges::contain(&options.fields, 1); if field_selected { - // Field must be valid UTF-8 for numeric conversion - let field_str = std::str::from_utf8(field) - .map_err(|_| translate!("numfmt-error-invalid-number", "input" => String::from_utf8_lossy(field).into_owned().quote()))? + let field_str = std::str::from_utf8(input) + .map_err(|_| { + translate!( + "numfmt-error-invalid-number", + "input" => String::from_utf8_lossy(input).into_owned().quote() + ) + })? .trim_start(); - let formatted = format_string(field_str, options, None)?; - output.extend_from_slice(formatted.as_bytes()); + match format_string(field_str, options, None) { + Ok(formatted) => output.extend_from_slice(formatted.as_bytes()), + Err(err) => { + if options.invalid == crate::options::InvalidModes::Abort { + std::io::stdout() + .write_all(&output) + .map_err(|e| e.to_string())?; + } + return Err(err); + } + } } else { - // add unselected field without conversion - output.extend_from_slice(field); + output.extend_from_slice(input); + } + } else { + for (n, field) in (1..).zip(split_bytes(input, delimiter)) { + let field_selected = uucore::ranges::contain(&options.fields, n); + + // add delimiter before second and subsequent fields + if n > 1 { + output.extend_from_slice(delimiter); + } + + if field_selected { + // Field must be valid UTF-8 for numeric conversion + let field_str = std::str::from_utf8(field) + .map_err(|_| { + translate!( + "numfmt-error-invalid-number", + "input" => String::from_utf8_lossy(field).into_owned().quote() + ) + })? + .trim_start(); + match format_string(field_str, options, None) { + Ok(formatted) => output.extend_from_slice(formatted.as_bytes()), + Err(err) => { + if options.invalid == crate::options::InvalidModes::Abort { + std::io::stdout() + .write_all(&output) + .map_err(|e| e.to_string())?; + } + return Err(err); + } + } + } else { + // add unselected field without conversion + output.extend_from_slice(field); + } } } - output.push(eol); + if append_eol { + output.push(eol); + } std::io::Write::write_all(&mut std::io::stdout(), &output).map_err(|e| e.to_string())?; Ok(()) } -pub fn format_and_print_whitespace(s: &str, options: &NumfmtOptions) -> Result<()> { + +pub fn format_and_print_whitespace( + s: &str, + options: &NumfmtOptions, + append_eol: bool, +) -> Result<()> { let mut output = String::new(); + let skip_whitespace = + unit_separator_skip_char(&options.unit_separator, options.unit_separator_specified); - for (n, (prefix, field)) in (1..).zip(WhitespaceSplitter { s: Some(s) }) { + for (n, (prefix, field)) in (1..).zip(WhitespaceSplitter { + s: Some(s), + skip_whitespace, + }) { let field_selected = uucore::ranges::contain(&options.fields, n); + let prefix_len = prefix.chars().count(); + let field_len = field.chars().count(); if field_selected { - let empty_prefix = prefix.is_empty(); + let empty_prefix = prefix_len == 0; - // add delimiter before second and subsequent fields - let prefix = if n > 1 { + let prefix_for_padding_len = if n > 1 { output.push(' '); - &prefix[1..] + prefix_len.saturating_sub(1) } else { - prefix + prefix_len }; let implicit_padding = if !empty_prefix && options.padding == 0 { - Some((prefix.len() + field.len()) as isize) + Some((prefix_for_padding_len + field_len) as isize) } else { None }; - output.push_str(&format_string(field, options, implicit_padding)?); + match format_string(field, options, implicit_padding) { + Ok(formatted) => output.push_str(&formatted), + Err(err) => { + if options.invalid == crate::options::InvalidModes::Abort { + std::io::stdout() + .write_all(output.as_bytes()) + .map_err(|e| e.to_string())?; + } + return Err(err); + } + } } else { // the -z option converts an initial \n into a space - let prefix = if options.zero_terminated && prefix.starts_with('\n') { + if options.zero_terminated && prefix.starts_with('\n') { output.push(' '); - &prefix[1..] - } else { - prefix - }; + if prefix_len > 1 { + output.push_str(&" ".repeat(prefix_len - 1)); + } + } else if prefix_len > 0 { + output.push_str(&" ".repeat(prefix_len)); + } // add unselected field without conversion - output.push_str(prefix); output.push_str(field); } } - let eol = if options.zero_terminated { '\0' } else { '\n' }; - output.push(eol); + if append_eol { + let eol = if options.zero_terminated { '\0' } else { '\n' }; + output.push(eol); + } print!("{output}"); Ok(()) @@ -768,4 +1482,30 @@ mod tests { assert_eq!(raw_suffix as i32, RawSuffix::Q as i32); assert_eq!(value, 5.0); } + + #[test] + fn test_whitespace_splitter_nbsp_not_separator() { + let s = format!("1\u{00A0}K 2"); + let mut fields = WhitespaceSplitter { + s: Some(&s), + skip_whitespace: None, + }; + + assert_eq!(Some(("", "1\u{00A0}K")), fields.next()); + assert_eq!(Some((" ", "2")), fields.next()); + assert_eq!(None, fields.next()); + } + + #[test] + fn test_whitespace_splitter_em_space_is_separator() { + let s = format!("1\u{2003}2"); + let mut fields = WhitespaceSplitter { + s: Some(&s), + skip_whitespace: None, + }; + + assert_eq!(Some(("", "1")), fields.next()); + assert_eq!(Some(("\u{2003}", "2")), fields.next()); + assert_eq!(None, fields.next()); + } } diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index 80528dbd4c5..eb28a12d96e 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -7,11 +7,14 @@ use crate::errors::*; use crate::format::{format_and_print_delimited, format_and_print_whitespace}; use crate::options::*; use crate::units::{Result, Unit}; -use clap::{Arg, ArgAction, ArgMatches, Command, builder::ValueParser, parser::ValueSource}; +use clap::{ + Arg, ArgAction, ArgMatches, Command, builder::ValueParser, error::ErrorKind, + parser::ValueSource, +}; use std::ffi::OsString; -use std::io::{BufRead, Error, Write}; -use std::result::Result as StdResult; +use std::io::{BufRead, Write}; use std::str::FromStr; +use std::sync::atomic::{AtomicBool, Ordering}; use units::{IEC_BASES, SI_BASES}; use uucore::display::Quotable; @@ -28,9 +31,11 @@ pub mod format; pub mod options; mod units; +static HAD_INVALID: AtomicBool = AtomicBool::new(false); + fn handle_args<'a>(args: impl Iterator, options: &NumfmtOptions) -> UResult<()> { for l in args { - format_and_handle_validation(l, options)?; + format_and_handle_validation(l, options, true)?; } Ok(()) } @@ -40,29 +45,42 @@ where R: BufRead, { let terminator = if options.zero_terminated { 0u8 } else { b'\n' }; - handle_buffer_iterator(input.split(terminator), options, terminator) -} + let mut reader = input; + let mut buf = Vec::new(); + let mut idx = 0usize; + + loop { + buf.clear(); + let read = reader + .read_until(terminator, &mut buf) + .map_err(|e| NumfmtError::IoError(e.to_string()))?; + if read == 0 { + break; + } + let has_terminator = buf.last() == Some(&terminator); + if has_terminator { + buf.pop(); + } -fn handle_buffer_iterator( - iter: impl Iterator, Error>>, - options: &NumfmtOptions, - terminator: u8, -) -> UResult<()> { - for (idx, line_result) in iter.enumerate() { - match line_result { - Ok(line) if idx < options.header => { - std::io::stdout().write_all(&line)?; + if idx < options.header { + std::io::stdout().write_all(&buf)?; + if has_terminator { std::io::stdout().write_all(&[terminator])?; - Ok(()) } - Ok(line) => format_and_handle_validation(&line, options), - Err(err) => return Err(Box::new(NumfmtError::IoError(err.to_string()))), - }?; + } else { + format_and_handle_validation(&buf, options, has_terminator)?; + } + idx += 1; } + Ok(()) } -fn format_and_handle_validation(input_line: &[u8], options: &NumfmtOptions) -> UResult<()> { +fn format_and_handle_validation( + input_line: &[u8], + options: &NumfmtOptions, + append_eol: bool, +) -> UResult<()> { let eol = if options.zero_terminated { b'\0' } else { @@ -70,11 +88,11 @@ fn format_and_handle_validation(input_line: &[u8], options: &NumfmtOptions) -> U }; let handled_line = if options.delimiter.is_some() { - format_and_print_delimited(input_line, options) + format_and_print_delimited(input_line, options, append_eol) } else { // Whitespace mode requires valid UTF-8 match std::str::from_utf8(input_line) { - Ok(s) => format_and_print_whitespace(s, options), + Ok(s) => format_and_print_whitespace(s, options, append_eol), Err(_) => Err(translate!("numfmt-error-invalid-input")), } }; @@ -85,6 +103,7 @@ fn format_and_handle_validation(input_line: &[u8], options: &NumfmtOptions) -> U return Err(Box::new(NumfmtError::FormattingError(error_message))); } InvalidModes::Fail => { + HAD_INVALID.store(true, Ordering::Relaxed); show!(NumfmtError::FormattingError(error_message)); } InvalidModes::Warn => { @@ -93,7 +112,9 @@ fn format_and_handle_validation(input_line: &[u8], options: &NumfmtOptions) -> U InvalidModes::Ignore => {} } std::io::stdout().write_all(input_line)?; - std::io::stdout().write_all(&[eol])?; + if append_eol { + std::io::stdout().write_all(&[eol])?; + } } Ok(()) @@ -174,6 +195,133 @@ fn parse_delimiter(arg: &OsString) -> Result> { } } +#[derive(Debug)] +enum FieldParseError { + InvalidValue(String), + InvalidRange, + NumberedFromOne, + DecreasingRange, + TooLarge(String), +} + +fn format_field_parse_error(err: FieldParseError, try_help: &str) -> String { + match err { + FieldParseError::InvalidValue(value) => { + format!("invalid field value '{value}'\n{try_help}") + } + FieldParseError::InvalidRange => { + format!("invalid field range\n{try_help}") + } + FieldParseError::NumberedFromOne => { + format!("fields are numbered from 1\n{try_help}") + } + FieldParseError::DecreasingRange => { + format!("invalid decreasing range\n{try_help}") + } + FieldParseError::TooLarge(value) => { + format!("field number '{value}' is too large\n{try_help}") + } + } +} + +fn parse_field_number(value: &str) -> std::result::Result { + if value.is_empty() { + return Err(FieldParseError::InvalidValue(value.to_string())); + } + if !value.chars().all(|c| c.is_ascii_digit()) { + return Err(FieldParseError::InvalidValue(value.to_string())); + } + let parsed: u128 = value.parse().unwrap_or(u128::MAX); + if parsed == 0 { + return Err(FieldParseError::NumberedFromOne); + } + let max_allowed = (usize::MAX - 1) as u128; + if parsed > max_allowed { + return Err(FieldParseError::TooLarge(value.to_string())); + } + Ok(parsed as usize) +} + +fn parse_field_item(item: &str) -> std::result::Result { + let dash_count = item.matches('-').count(); + if dash_count > 1 { + return Err(FieldParseError::InvalidRange); + } + if dash_count == 0 { + let n = parse_field_number(item)?; + return Ok(Range { low: n, high: n }); + } + + let (low_raw, high_raw) = item.split_once('-').unwrap(); + match (low_raw, high_raw) { + ("", "") => Err(FieldParseError::InvalidRange), + ("", high) => { + let high = parse_field_number(high)?; + Ok(Range { low: 1, high }) + } + (low, "") => { + let low = parse_field_number(low)?; + Ok(Range { + low, + high: usize::MAX - 1, + }) + } + (low, high) => { + let low = parse_field_number(low)?; + let high = parse_field_number(high)?; + if low > high { + Err(FieldParseError::DecreasingRange) + } else { + Ok(Range { low, high }) + } + } + } +} + +fn parse_field_list(list: &str, try_help: &str) -> Result> { + // a lone "-" means "all fields", even as part of a list of fields + if list.split(&[',', ' ']).any(|x| x == "-") { + return Ok(vec![Range { + low: 1, + high: usize::MAX, + }]); + } + + let mut ranges = Vec::new(); + for item in list.split(&[',', ' ']) { + if item.is_empty() { + continue; + } + match parse_field_item(item) { + Ok(range) => ranges.push(range), + Err(err) => return Err(format_field_parse_error(err, try_help)), + } + } + + Ok(merge_ranges(ranges)) +} + +fn merge_ranges(mut ranges: Vec) -> Vec { + if ranges.is_empty() { + return ranges; + } + ranges.sort(); + + let mut merged = Vec::with_capacity(ranges.len()); + let mut iter = ranges.into_iter(); + let mut current = iter.next().unwrap(); + for range in iter { + if range.low <= current.high { + current.high = current.high.max(range.high); + } else { + merged.push(current); + current = range; + } + } + merged.push(current); + merged +} + fn parse_options(args: &ArgMatches) -> Result { let from = parse_unit(args.get_one::(FROM).unwrap())?; let to = parse_unit(args.get_one::(TO).unwrap())?; @@ -214,23 +362,26 @@ fn parse_options(args: &ArgMatches) -> Result { Ok(0) }?; - let fields = args.get_one::(FIELD).unwrap().as_str(); - // a lone "-" means "all fields", even as part of a list of fields - let fields = if fields.split(&[',', ' ']).any(|x| x == "-") { - vec![Range { - low: 1, - high: usize::MAX, - }] - } else { - Range::from_list(fields)? - }; + let try_help = format!("Try '{} --help' for more information.", uucore::util_name()); + let field_value = args + .get_one::(FIELD) + .map_or(FIELD_DEFAULT, String::as_str); + let fields = parse_field_list(field_value, &try_help)?; let format = match args.get_one::(FORMAT) { Some(s) => s.parse()?, None => FormatOptions::default(), }; - if format.grouping && to != Unit::None { + let grouping = args.get_flag(GROUPING); + let format_specified = args.value_source(FORMAT) == Some(ValueSource::CommandLine); + if grouping && format_specified { + return Err(translate!( + "numfmt-error-grouping-cannot-be-combined-with-format" + )); + } + + if (grouping || format.grouping) && to != Unit::None { return Err(translate!( "numfmt-error-grouping-cannot-be-combined-with-to" )); @@ -253,6 +404,8 @@ fn parse_options(args: &ArgMatches) -> Result { let suffix = args.get_one::(SUFFIX).cloned(); + let unit_separator_specified = + args.value_source(UNIT_SEPARATOR) == Some(ValueSource::CommandLine); let unit_separator = args .get_one::(UNIT_SEPARATOR) .cloned() @@ -273,10 +426,13 @@ fn parse_options(args: &ArgMatches) -> Result { round, suffix, unit_separator, + unit_separator_specified, format, + grouping, invalid, zero_terminated, debug, + dev_debug: false, }) } @@ -297,9 +453,33 @@ fn print_debug_warnings(options: &NumfmtOptions, matches: &ArgMatches) { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?; + let mut args: Vec = args.collect(); + let mut dev_debug = false; + for arg in &mut args { + if arg == "---debug" { + *arg = OsString::from("--debug"); + dev_debug = true; + } + } - let options = parse_options(&matches).map_err(NumfmtError::IllegalArgument)?; + let matches = match uu_app().try_get_matches_from(&args) { + Ok(matches) => matches, + Err(err) => { + if err.kind() == ErrorKind::UnknownArgument { + let try_help = + format!("Try '{} --help' for more information.", uucore::util_name()); + let message = format!("unrecognized option\n{try_help}"); + return Err(NumfmtError::IllegalArgument(message).into()); + } + return Err(err.into()); + } + }; + + let mut options = parse_options(&matches).map_err(NumfmtError::IllegalArgument)?; + if dev_debug { + options.dev_debug = true; + options.debug = true; + } if options.debug { print_debug_warnings(&options, &matches); @@ -307,6 +487,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let result = match matches.get_many::(NUMBER) { Some(values) => { + if options.debug && options.header > 0 { + show_error!("--header ignored with command-line input"); + } let byte_args: Vec<&[u8]> = values .map(|s| os_str_as_bytes(s).map_err(|e| e.to_string())) .collect::, _>>() @@ -320,6 +503,21 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } }; + if options.dev_debug { + eprintln!("MAX_UNSCALED_DIGITS: 18"); + } + + if options.debug && !has_conversion_option(&options) { + show_error!("no conversion option specified"); + } + if options.debug && grouping_requested(&options) && !locale_has_grouping() { + show_error!("grouping has no effect in this locale"); + } + if options.debug && options.invalid == InvalidModes::Fail && HAD_INVALID.load(Ordering::Relaxed) + { + show_error!("failed to convert some of the input numbers"); + } + match result { Err(e) => { std::io::stdout().flush().expect("error flushing stdout"); @@ -329,6 +527,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } } +fn grouping_requested(options: &NumfmtOptions) -> bool { + options.grouping || options.format.grouping +} + +fn has_conversion_option(options: &NumfmtOptions) -> bool { + options.transform.from != Unit::None + || options.transform.to != Unit::None + || options.transform.from_unit != 1 + || options.transform.to_unit != 1 + || options.padding != 0 + || options.grouping + || options.format != FormatOptions::default() + || options.suffix.is_some() + || !options.unit_separator.is_empty() +} + +fn locale_has_grouping() -> bool { + crate::format::locale_grouping_separator_string().is_some() +} + pub fn uu_app() -> Command { Command::new(uucore::util_name()) .version(uucore::crate_version!()) @@ -358,7 +576,8 @@ pub fn uu_app() -> Command { .help(translate!("numfmt-help-field")) .value_name("FIELDS") .allow_hyphen_values(true) - .default_value(FIELD_DEFAULT), + .num_args(1) + .action(ArgAction::Set), ) .arg( Arg::new(FORMAT) @@ -367,6 +586,12 @@ pub fn uu_app() -> Command { .value_name("FORMAT") .allow_hyphen_values(true), ) + .arg( + Arg::new(GROUPING) + .long(GROUPING) + .help(translate!("numfmt-help-grouping")) + .action(ArgAction::SetTrue), + ) .arg( Arg::new(FROM) .long(FROM) @@ -444,6 +669,12 @@ pub fn uu_app() -> Command { .value_parser(["abort", "fail", "warn", "ignore"]) .value_name("INVALID"), ) + .arg( + Arg::new(DEBUG) + .long(DEBUG) + .help(translate!("numfmt-help-debug")) + .action(ArgAction::SetTrue), + ) .arg( Arg::new(ZERO_TERMINATED) .long(ZERO_TERMINATED) @@ -491,10 +722,13 @@ mod tests { round: RoundMethod::Nearest, suffix: None, unit_separator: String::new(), + unit_separator_specified: false, format: FormatOptions::default(), + grouping: false, invalid: InvalidModes::Abort, zero_terminated: false, debug: false, + dev_debug: false, } } diff --git a/src/uu/numfmt/src/options.rs b/src/uu/numfmt/src/options.rs index fedcd19c9e3..0a716f4d8c2 100644 --- a/src/uu/numfmt/src/options.rs +++ b/src/uu/numfmt/src/options.rs @@ -19,6 +19,7 @@ pub const FROM_UNIT: &str = "from-unit"; pub const FROM_UNIT_DEFAULT: &str = "1"; pub const HEADER: &str = "header"; pub const HEADER_DEFAULT: &str = "1"; +pub const GROUPING: &str = "grouping"; pub const INVALID: &str = "invalid"; pub const NUMBER: &str = "NUMBER"; pub const PADDING: &str = "padding"; @@ -55,10 +56,13 @@ pub struct NumfmtOptions { pub round: RoundMethod, pub suffix: Option, pub unit_separator: String, + pub unit_separator_specified: bool, pub format: FormatOptions, + pub grouping: bool, pub invalid: InvalidModes, pub zero_terminated: bool, pub debug: bool, + pub dev_debug: bool, } #[derive(Clone, Copy)]