Skip to content
29 changes: 1 addition & 28 deletions src/uu/dd/src/numbers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,9 @@

//! Functions for formatting a number as a magnitude and a unit suffix.

/// The first ten powers of 1024.
const IEC_BASES: [u128; 10] = [
1,
1_024,
1_048_576,
1_073_741_824,
1_099_511_627_776,
1_125_899_906_842_624,
1_152_921_504_606_846_976,
1_180_591_620_717_411_303_424,
1_208_925_819_614_629_174_706_176,
1_237_940_039_285_380_274_899_124_224,
];
use uucore::parser::parse_size::{IEC_BASES, SI_BASES};

const IEC_SUFFIXES: [&str; 9] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];

/// The first ten powers of 1000.
const SI_BASES: [u128; 10] = [
1,
1_000,
1_000_000,
1_000_000_000,
1_000_000_000_000,
1_000_000_000_000_000,
1_000_000_000_000_000_000,
1_000_000_000_000_000_000_000,
1_000_000_000_000_000_000_000_000,
1_000_000_000_000_000_000_000_000_000,
];

const SI_SUFFIXES: [&str; 9] = ["B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];

/// A `SuffixType` determines whether the suffixes are 1000 or 1024 based.
Expand Down
36 changes: 5 additions & 31 deletions src/uu/df/src/blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,11 @@ use std::{env, fmt};

use uucore::{
display::Quotable,
parser::parse_size::{ParseSizeError, parse_size_non_zero_u64, parse_size_u64},
parser::parse_size::{
IEC_BASES, ParseSizeError, SI_BASES, parse_size_non_zero_u64, parse_size_u64,
},
};

/// The first ten powers of 1024.
const IEC_BASES: [u128; 10] = [
1,
1_024,
1_048_576,
1_073_741_824,
1_099_511_627_776,
1_125_899_906_842_624,
1_152_921_504_606_846_976,
1_180_591_620_717_411_303_424,
1_208_925_819_614_629_174_706_176,
1_237_940_039_285_380_274_899_124_224,
];

/// The first ten powers of 1000.
const SI_BASES: [u128; 10] = [
1,
1_000,
1_000_000,
1_000_000_000,
1_000_000_000_000,
1_000_000_000_000_000,
1_000_000_000_000_000_000,
1_000_000_000_000_000_000_000,
1_000_000_000_000_000_000_000_000,
1_000_000_000_000_000_000_000_000_000,
];

/// A `SuffixType` determines whether the suffixes are 1000 or 1024 based, and whether they are
/// intended for `HumanReadable` mode or not.
#[derive(Clone, Copy)]
Expand All @@ -50,8 +24,8 @@ pub(crate) enum SuffixType {
}

impl SuffixType {
/// The first ten powers of 1024 and 1000, respectively.
fn bases(self) -> [u128; 10] {
/// The first eleven powers of 1024 and 1000, respectively.
fn bases(self) -> [u128; 11] {
match self {
Self::Iec | Self::HumanReadable(HumanReadable::Binary) => IEC_BASES,
Self::Si | Self::HumanReadable(HumanReadable::Decimal) => SI_BASES,
Expand Down
87 changes: 30 additions & 57 deletions src/uu/numfmt/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ use uucore::i18n::decimal::locale_grouping_separator;
use uucore::translate;

use crate::options::{NumfmtOptions, RoundMethod, TransformOptions};
use crate::units::{DisplayableSuffix, IEC_BASES, RawSuffix, Result, SI_BASES, Suffix, Unit};
use crate::units::{
DisplayableSuffix, RawSuffix, Result, Suffix, Unit, iec_bases_f64, si_bases_f64,
};

fn find_numeric_beginning(s: &str) -> Option<&str> {
let mut decimal_point_seen = false;
Expand Down Expand Up @@ -173,29 +175,17 @@ fn parse_suffix(
if with_i {
iter.next_back();
}
let suffix = match iter.next_back() {
Some('K') => Some((RawSuffix::K, with_i)),
Some('k') => Some((RawSuffix::K, with_i)),
Some('M') => Some((RawSuffix::M, with_i)),
Some('G') => Some((RawSuffix::G, with_i)),
Some('T') => Some((RawSuffix::T, with_i)),
Some('P') => Some((RawSuffix::P, with_i)),
Some('E') => Some((RawSuffix::E, with_i)),
Some('Z') => Some((RawSuffix::Z, with_i)),
Some('Y') => Some((RawSuffix::Y, with_i)),
Some('R') => Some((RawSuffix::R, with_i)),
Some('Q') => Some((RawSuffix::Q, with_i)),
Some('0'..='9') if !with_i => None,
_ => {
return Err(translate!("numfmt-error-invalid-number", "input" => s.quote()));
}
};
let last = iter.next_back();
let suffix = last
.and_then(|c| RawSuffix::try_from(&c).ok())
.map(|raw| (raw, with_i));
match (suffix, last) {
(Some(_), _) => {}
(None, Some(c)) if c.is_ascii_digit() && !with_i => {}
_ => return Err(translate!("numfmt-error-invalid-number", "input" => s.quote())),
}

let suffix_len = match suffix {
None => 0,
Some((_, false)) => 1,
Some((_, true)) => 2,
};
let suffix_len = suffix.map_or(0, |(_, with_i)| 1 + usize::from(with_i));

let number_part = &trimmed[..trimmed.len() - suffix_len];

Expand Down Expand Up @@ -355,40 +345,22 @@ fn parse_implicit_precision(s: &str) -> usize {
}

fn remove_suffix(i: f64, s: Option<Suffix>, u: Unit) -> Result<f64> {
match (s, u) {
(Some((raw_suffix, false)), Unit::Auto | Unit::Si) => match raw_suffix {
RawSuffix::K => Ok(i * 1e3),
RawSuffix::M => Ok(i * 1e6),
RawSuffix::G => Ok(i * 1e9),
RawSuffix::T => Ok(i * 1e12),
RawSuffix::P => Ok(i * 1e15),
RawSuffix::E => Ok(i * 1e18),
RawSuffix::Z => Ok(i * 1e21),
RawSuffix::Y => Ok(i * 1e24),
RawSuffix::R => Ok(i * 1e27),
RawSuffix::Q => Ok(i * 1e30),
},
(Some((raw_suffix, false)), Unit::Iec(false))
| (Some((raw_suffix, true)), Unit::Auto | Unit::Iec(true)) => match raw_suffix {
RawSuffix::K => Ok(i * IEC_BASES[1]),
RawSuffix::M => Ok(i * IEC_BASES[2]),
RawSuffix::G => Ok(i * IEC_BASES[3]),
RawSuffix::T => Ok(i * IEC_BASES[4]),
RawSuffix::P => Ok(i * IEC_BASES[5]),
RawSuffix::E => Ok(i * IEC_BASES[6]),
RawSuffix::Z => Ok(i * IEC_BASES[7]),
RawSuffix::Y => Ok(i * IEC_BASES[8]),
RawSuffix::R => Ok(i * IEC_BASES[9]),
RawSuffix::Q => Ok(i * IEC_BASES[10]),
},
(Some((raw_suffix, false)), Unit::Iec(true)) => Err(
let Some((raw_suffix, with_i)) = s else {
return Ok(i);
};
let idx = raw_suffix.index() + 1;
match (with_i, u) {
(false, Unit::Auto | Unit::Si) => Ok(i * si_bases_f64()[idx]),
(false, Unit::Iec(false)) | (true, Unit::Auto | Unit::Iec(true)) => {
Ok(i * iec_bases_f64()[idx])
}
(false, Unit::Iec(true)) => Err(
translate!("numfmt-error-missing-i-suffix", "number" => i, "suffix" => format!("{raw_suffix:?}")),
),
(Some((raw_suffix, with_i)), Unit::None) => Err(
(_, Unit::None) => Err(
translate!("numfmt-error-rejecting-suffix", "number" => i, "suffix" => format!("{raw_suffix:?}{}", if with_i { "i" } else { "" })),
),
(None, _) => Ok(i),
(_, _) => Err(translate!("numfmt-error-suffix-unsupported-for-unit")),
_ => Err(translate!("numfmt-error-suffix-unsupported-for-unit")),
}
}

Expand Down Expand Up @@ -472,8 +444,8 @@ fn consider_suffix(
let suffixes = [K, M, G, T, P, E, Z, Y, R, Q];

let (bases, with_i) = match u {
Unit::Si => (&SI_BASES, false),
Unit::Iec(with_i) => (&IEC_BASES, with_i),
Unit::Si => (si_bases_f64(), false),
Unit::Iec(with_i) => (iec_bases_f64(), with_i),
Unit::Auto => return Err(translate!("numfmt-error-unit-auto-not-supported-with-to")),
Unit::None => return Ok((n, None)),
};
Expand Down Expand Up @@ -882,13 +854,14 @@ mod tests {
assert!(result.is_ok());
assert_eq!(result.unwrap(), 1e27);

let iec = iec_bases_f64();
let result = remove_suffix(1.0, Some((RawSuffix::Q, true)), Unit::Iec(true));
assert!(result.is_ok());
assert_eq!(result.unwrap(), IEC_BASES[10]);
assert_eq!(result.unwrap(), iec[10]);

let result = remove_suffix(1.0, Some((RawSuffix::R, true)), Unit::Iec(true));
assert!(result.is_ok());
assert_eq!(result.unwrap(), IEC_BASES[9]);
assert_eq!(result.unwrap(), iec[9]);
}

#[test]
Expand Down
30 changes: 13 additions & 17 deletions src/uu/numfmt/src/numfmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ use std::ffi::OsString;
use std::io::{BufRead, Write as _, stderr};
use std::str::FromStr;

use units::{IEC_BASES, SI_BASES};
use uucore::display::Quotable;
use uucore::error::UResult;
use uucore::i18n::decimal::locale_grouping_separator;
use uucore::parser::parse_size::{IEC_BASES, SI_BASES};
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
use uucore::ranges::Range;
use uucore::{format_usage, os_str_as_bytes, show, translate};
Expand Down Expand Up @@ -164,15 +164,16 @@ fn parse_unit(s: &str) -> Result<Unit> {
/// Parses a unit size. Suffixes are turned into their integer representations. For example, 'K'
/// will return `Ok(1000)`, and '2K' will return `Ok(2000)`.
fn parse_unit_size(s: &str) -> Result<usize> {
let number: String = s.chars().take_while(char::is_ascii_digit).collect();
let suffix = &s[number.len()..];
let split = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
let (number, suffix) = s.split_at(split);

if number.is_empty() || "0".repeat(number.len()) != number {
// Reject all-zero numeric parts like "0" or "00K".
let all_zero = !number.is_empty() && number.bytes().all(|b| b == b'0');
if !all_zero {
if let Some(multiplier) = parse_unit_size_suffix(suffix) {
if number.is_empty() {
return Ok(multiplier);
}

if let Ok(n) = number.parse::<usize>() {
return Ok(n * multiplier);
}
Expand All @@ -193,20 +194,15 @@ fn parse_unit_size_suffix(s: &str) -> Option<usize> {
return Some(1);
}

let suffix = s.chars().next().unwrap();

if let Some(i) = ['K', 'M', 'G', 'T', 'P', 'E']
let i = ['K', 'M', 'G', 'T', 'P', 'E']
.iter()
.position(|&ch| ch == suffix)
{
return match s.len() {
1 => Some(SI_BASES[i + 1] as usize),
2 if s.ends_with('i') => Some(IEC_BASES[i + 1] as usize),
_ => None,
};
}
.position(|&ch| s.starts_with(ch))?;

None
match s.len() {
1 => Some(SI_BASES[i + 1] as usize),
2 if s.ends_with('i') => Some(IEC_BASES[i + 1] as usize),
_ => None,
}
}

/// Parse delimiter argument, ensuring it's a single character.
Expand Down
65 changes: 33 additions & 32 deletions src/uu/numfmt/src/units.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,18 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::fmt;
use uucore::parser::parse_size::{IEC_BASES, SI_BASES};

pub const SI_BASES: [f64; 11] = [1., 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21, 1e24, 1e27, 1e30];
/// `f64` view of [`uucore::parser::parse_size::SI_BASES`] for numfmt's
/// floating-point math paths.
pub fn si_bases_f64() -> [f64; 11] {
SI_BASES.map(|b| b as f64)
}

pub const IEC_BASES: [f64; 11] = [
1.,
1_024.,
1_048_576.,
1_073_741_824.,
1_099_511_627_776.,
1_125_899_906_842_624.,
1_152_921_504_606_846_976.,
1_180_591_620_717_411_303_424.,
1_208_925_819_614_629_174_706_176.,
1_237_940_039_285_380_274_899_124_224.,
1_267_650_600_228_229_401_496_703_205_376.,
];
/// `f64` view of [`uucore::parser::parse_size::IEC_BASES`].
pub fn iec_bases_f64() -> [f64; 11] {
IEC_BASES.map(|b| b as f64)
}

pub type WithI = bool;

Expand All @@ -33,8 +29,9 @@ pub enum Unit {
pub type Result<T> = std::result::Result<T, String>;

#[derive(Clone, Copy, Debug)]
#[repr(usize)]
pub enum RawSuffix {
K,
K = 0,
M,
G,
T,
Expand All @@ -46,6 +43,15 @@ pub enum RawSuffix {
Q,
}

impl RawSuffix {
/// Index of this suffix in the base arrays, minus one.
/// `K` is 0, `M` is 1, ..., `Q` is 9. The associated base is
/// `BASES[self.index() + 1]`.
pub fn index(self) -> usize {
self as usize
}
Comment on lines +47 to +52
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels error prone, though currently I don't see a better solution :|

}

impl TryFrom<&char> for RawSuffix {
type Error = String;

Expand All @@ -70,25 +76,20 @@ pub type Suffix = (RawSuffix, WithI);

pub struct DisplayableSuffix(pub Suffix, pub Unit);

/// Upper-case characters for each [`RawSuffix`], indexed by [`RawSuffix::index`].
const SUFFIX_CHARS: [char; 10] = ['K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y', 'R', 'Q'];

impl fmt::Display for DisplayableSuffix {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let Self((ref raw_suffix, ref with_i), unit) = *self;
match (raw_suffix, unit) {
(RawSuffix::K, Unit::Si) => write!(f, "k"),
(RawSuffix::K, _) => write!(f, "K"),
(RawSuffix::M, _) => write!(f, "M"),
(RawSuffix::G, _) => write!(f, "G"),
(RawSuffix::T, _) => write!(f, "T"),
(RawSuffix::P, _) => write!(f, "P"),
(RawSuffix::E, _) => write!(f, "E"),
(RawSuffix::Z, _) => write!(f, "Z"),
(RawSuffix::Y, _) => write!(f, "Y"),
(RawSuffix::R, _) => write!(f, "R"),
(RawSuffix::Q, _) => write!(f, "Q"),
let Self((raw_suffix, with_i), unit) = *self;
let ch = match (raw_suffix, unit) {
(RawSuffix::K, Unit::Si) => 'k',
_ => SUFFIX_CHARS[raw_suffix.index()],
};
write!(f, "{ch}")?;
if with_i {
write!(f, "i")?;
}
.and_then(|()| match with_i {
true => write!(f, "i"),
false => Ok(()),
})
Ok(())
}
}
Loading
Loading