From 6db117dc763dbb054a28af33532d49396e582c9f Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Sun, 20 Jul 2025 23:02:42 +0800 Subject: [PATCH 1/9] df: Move to use OsString This allows us to handle non-Unicode parameters. --- src/uu/df/src/df.rs | 3 ++- src/uu/df/src/filesystem.rs | 10 +++++----- src/uu/df/src/table.rs | 12 +++++++++--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index 24dccdae769..bf1964db3d3 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -431,7 +431,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let opt = Options::from(&matches).map_err(DfError::OptionsError)?; // Get the list of filesystems to display in the output table. - let filesystems: Vec = match matches.get_many::(OPT_PATHS) { + let filesystems: Vec = match matches.get_many::(OPT_PATHS) { None => { let filesystems = get_all_filesystems(&opt).map_err(|e| { let context = get_message("df-error-cannot-read-table-of-mounted-filesystems"); @@ -611,6 +611,7 @@ pub fn uu_app() -> Command { .arg( Arg::new(OPT_PATHS) .action(ArgAction::Append) + .value_parser(ValueParser::os_string()) .value_hint(clap::ValueHint::AnyPath), ) } diff --git a/src/uu/df/src/filesystem.rs b/src/uu/df/src/filesystem.rs index 53589ae8cf7..00905ab6103 100644 --- a/src/uu/df/src/filesystem.rs +++ b/src/uu/df/src/filesystem.rs @@ -8,7 +8,7 @@ //! filesystem mounted at a particular directory. It also includes //! information on amount of space available and amount of space used. // spell-checker:ignore canonicalized -use std::path::Path; +use std::{ffi::OsString, path::Path}; #[cfg(unix)] use uucore::fsext::statfs; @@ -28,7 +28,7 @@ pub(crate) struct Filesystem { /// When invoking `df` with a positional argument, it displays /// usage information for the filesystem that contains the given /// file. If given, this field contains that filename. - pub file: Option, + pub file: Option, /// Information about the mounted device, mount directory, and related options. pub mount_info: MountInfo, @@ -123,7 +123,7 @@ where impl Filesystem { // TODO: resolve uuid in `mount_info.dev_name` if exists - pub(crate) fn new(mount_info: MountInfo, file: Option) -> Option { + pub(crate) fn new(mount_info: MountInfo, file: Option) -> Option { let _stat_path = if mount_info.mount_dir.is_empty() { #[cfg(unix)] { @@ -154,7 +154,7 @@ impl Filesystem { pub(crate) fn from_mount( mounts: &[MountInfo], mount: &MountInfo, - file: Option, + file: Option, ) -> Result { if is_over_mounted(mounts, mount) { Err(FsError::OverMounted) @@ -189,7 +189,7 @@ impl Filesystem { where P: AsRef, { - let file = path.as_ref().display().to_string(); + let file = path.as_ref().as_os_str().to_owned(); let canonicalize = true; let result = mount_info_from_path(mounts, path, canonicalize); diff --git a/src/uu/df/src/table.rs b/src/uu/df/src/table.rs index e3814df2bd2..e3725c07b6c 100644 --- a/src/uu/df/src/table.rs +++ b/src/uu/df/src/table.rs @@ -16,6 +16,7 @@ use crate::{BlockSize, Options}; use uucore::fsext::{FsUsage, MountInfo}; use uucore::locale::get_message; +use std::ffi::OsString; use std::fmt; use std::ops::AddAssign; @@ -25,7 +26,7 @@ use std::ops::AddAssign; /// filesystem device, the mountpoint, the number of bytes used, etc. pub(crate) struct Row { /// The filename given on the command-line, if given. - file: Option, + file: Option, /// Name of the device on which the filesystem lives. fs_device: String, @@ -283,7 +284,12 @@ impl<'a> RowFormatter<'a> { Column::Iused => self.scaled_inodes(self.row.inodes_used), Column::Iavail => self.scaled_inodes(self.row.inodes_free), Column::Ipcent => Self::percentage(self.row.inodes_usage), - Column::File => self.row.file.as_ref().unwrap_or(&"-".into()).to_string(), + Column::File => self + .row + .file + .as_ref() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or("-".into()), Column::Fstype => self.row.fs_type.to_string(), #[cfg(target_os = "macos")] @@ -516,7 +522,7 @@ mod tests { impl Default for Row { fn default() -> Self { Self { - file: Some("/path/to/file".to_string()), + file: Some("/path/to/file".into()), fs_device: "my_device".to_string(), fs_type: "my_type".to_string(), fs_mount: "my_mount".to_string(), From eedd84f3017e7280ed895a1e1a6d4eb26c368495 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 22 Jul 2025 21:35:39 +0800 Subject: [PATCH 2/9] uucore: fsext: Change MountInfo's mount_root/dir to OsString This is necessary to fix handling of non-Unicode filepath in `df`. Note that other field might also need to be modified in the future. --- Cargo.lock | 1 + fuzz/Cargo.lock | 1 + src/uu/comm/src/comm.rs | 2 +- src/uu/df/src/df.rs | 12 ++-- src/uu/df/src/filesystem.rs | 18 +++--- src/uu/df/src/table.rs | 27 ++++----- src/uu/stat/src/stat.rs | 16 ++--- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features/fsext.rs | 88 ++++++++++++++++------------ 9 files changed, 90 insertions(+), 76 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a441ee4df83..6a46111d88e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3977,6 +3977,7 @@ dependencies = [ "bigdecimal", "blake2b_simd", "blake3", + "bstr", "chrono", "clap", "crc32fast", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index cd90e4e67d7..21ed1dbee70 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -1584,6 +1584,7 @@ dependencies = [ "bigdecimal", "blake2b_simd", "blake3", + "bstr", "clap", "crc32fast", "data-encoding", diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index a0e10b747b3..2d35d4d8b0e 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -101,7 +101,7 @@ impl OrderChecker { return true; } - let is_ordered = current_line >= &self.last_line; + let is_ordered = *current_line >= *self.last_line; if !is_ordered && !self.has_error { eprintln!( "{}", diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index bf1964db3d3..f0a8c40e897 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -630,9 +630,9 @@ mod tests { dev_id: String::new(), dev_name: String::from(dev_name), fs_type: String::new(), - mount_dir: String::from(mount_dir), + mount_dir: mount_dir.into(), mount_option: String::new(), - mount_root: String::from(mount_root), + mount_root: mount_root.into(), remote: false, dummy: false, } @@ -680,9 +680,9 @@ mod tests { dev_id: String::from(dev_id), dev_name: String::new(), fs_type: String::new(), - mount_dir: String::from(mount_dir), + mount_dir: mount_dir.into(), mount_option: String::new(), - mount_root: String::new(), + mount_root: "/".into(), remote: false, dummy: false, } @@ -725,9 +725,9 @@ mod tests { dev_id: String::new(), dev_name: String::new(), fs_type: String::from(fs_type), - mount_dir: String::from(mount_dir), + mount_dir: mount_dir.into(), mount_option: String::new(), - mount_root: String::new(), + mount_root: "/".into(), remote, dummy, } diff --git a/src/uu/df/src/filesystem.rs b/src/uu/df/src/filesystem.rs index 00905ab6103..69e6b6dff3d 100644 --- a/src/uu/df/src/filesystem.rs +++ b/src/uu/df/src/filesystem.rs @@ -127,18 +127,18 @@ impl Filesystem { let _stat_path = if mount_info.mount_dir.is_empty() { #[cfg(unix)] { - mount_info.dev_name.clone() + mount_info.dev_name.clone().into() } #[cfg(windows)] { // On windows, we expect the volume id - mount_info.dev_id.clone() + mount_info.dev_id.clone().into() } } else { mount_info.mount_dir.clone() }; #[cfg(unix)] - let usage = FsUsage::new(statfs(_stat_path).ok()?); + let usage = FsUsage::new(statfs(&_stat_path).ok()?); #[cfg(windows)] let usage = FsUsage::new(Path::new(&_stat_path)).ok()?; Some(Self { @@ -205,6 +205,8 @@ mod tests { mod mount_info_from_path { + use std::ffi::OsString; + use uucore::fsext::MountInfo; use crate::filesystem::{FsError, mount_info_from_path}; @@ -215,9 +217,9 @@ mod tests { dev_id: String::default(), dev_name: String::default(), fs_type: String::default(), - mount_dir: String::from(mount_dir), + mount_dir: OsString::from(mount_dir), mount_option: String::default(), - mount_root: String::default(), + mount_root: OsString::default(), remote: Default::default(), dummy: Default::default(), } @@ -312,6 +314,8 @@ mod tests { #[cfg(not(windows))] mod over_mount { + use std::ffi::OsString; + use crate::filesystem::{Filesystem, FsError, is_over_mounted}; use uucore::fsext::MountInfo; @@ -320,9 +324,9 @@ mod tests { dev_id: String::default(), dev_name: dev_name.map(String::from).unwrap_or_default(), fs_type: String::default(), - mount_dir: String::from(mount_dir), + mount_dir: OsString::from(mount_dir), mount_option: String::default(), - mount_root: String::default(), + mount_root: OsString::default(), remote: Default::default(), dummy: Default::default(), } diff --git a/src/uu/df/src/table.rs b/src/uu/df/src/table.rs index e3725c07b6c..baa22ea712b 100644 --- a/src/uu/df/src/table.rs +++ b/src/uu/df/src/table.rs @@ -35,7 +35,7 @@ pub(crate) struct Row { fs_type: String, /// Path at which the filesystem is mounted. - fs_mount: String, + fs_mount: OsString, /// Total number of bytes in the filesystem regardless of whether they are used. bytes: u64, @@ -277,7 +277,7 @@ impl<'a> RowFormatter<'a> { if self.is_total_row && !self.options.columns.contains(&Column::Source) { get_message("df-total") } else { - self.row.fs_mount.to_string() + self.row.fs_mount.to_string_lossy().into_owned() } } Column::Itotal => self.scaled_inodes(self.row.inodes), @@ -288,8 +288,7 @@ impl<'a> RowFormatter<'a> { .row .file .as_ref() - .map(|s| s.to_string_lossy().into_owned()) - .unwrap_or("-".into()), + .map_or("-".into(), |s| s.to_string_lossy().into_owned()), Column::Fstype => self.row.fs_type.to_string(), #[cfg(target_os = "macos")] @@ -525,7 +524,7 @@ mod tests { file: Some("/path/to/file".into()), fs_device: "my_device".to_string(), fs_type: "my_type".to_string(), - fs_mount: "my_mount".to_string(), + fs_mount: "my_mount".into(), bytes: 100, bytes_used: 25, @@ -684,7 +683,7 @@ mod tests { }; let row = Row { fs_device: "my_device".to_string(), - fs_mount: "my_mount".to_string(), + fs_mount: "my_mount".into(), bytes: 100, bytes_used: 25, @@ -711,7 +710,7 @@ mod tests { let row = Row { fs_device: "my_device".to_string(), fs_type: "my_type".to_string(), - fs_mount: "my_mount".to_string(), + fs_mount: "my_mount".into(), bytes: 100, bytes_used: 25, @@ -737,7 +736,7 @@ mod tests { }; let row = Row { fs_device: "my_device".to_string(), - fs_mount: "my_mount".to_string(), + fs_mount: "my_mount".into(), inodes: 10, inodes_used: 2, @@ -781,7 +780,7 @@ mod tests { let row = Row { fs_device: "my_device".to_string(), fs_type: "my_type".to_string(), - fs_mount: "my_mount".to_string(), + fs_mount: "my_mount".into(), bytes: 4000, bytes_used: 1000, @@ -808,7 +807,7 @@ mod tests { let row = Row { fs_device: "my_device".to_string(), fs_type: "my_type".to_string(), - fs_mount: "my_mount".to_string(), + fs_mount: "my_mount".into(), bytes: 4096, bytes_used: 1024, @@ -874,9 +873,9 @@ mod tests { dev_id: "28".to_string(), dev_name: "none".to_string(), fs_type: "9p".to_string(), - mount_dir: "/usr/lib/wsl/drivers".to_string(), + mount_dir: "/usr/lib/wsl/drivers".into(), mount_option: "ro,nosuid,nodev,noatime".to_string(), - mount_root: "/".to_string(), + mount_root: "/".into(), remote: false, dummy: false, }, @@ -905,9 +904,9 @@ mod tests { dev_id: "28".to_string(), dev_name: "none".to_string(), fs_type: "9p".to_string(), - mount_dir: "/usr/lib/wsl/drivers".to_string(), + mount_dir: "/usr/lib/wsl/drivers".into(), mount_option: "ro,nosuid,nodev,noatime".to_string(), - mount_root: "/".to_string(), + mount_root: "/".into(), remote: false, dummy: false, }, diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index c33eb354f06..b9aebbbe7c4 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -22,7 +22,6 @@ use std::ffi::{OsStr, OsString}; use std::fs::{FileType, Metadata}; use std::io::Write; use std::os::unix::fs::{FileTypeExt, MetadataExt}; -use std::os::unix::prelude::OsStrExt; use std::path::Path; use std::{env, fs}; @@ -258,7 +257,7 @@ struct Stater { show_fs: bool, from_user: bool, files: Vec, - mount_list: Option>, + mount_list: Option>, default_tokens: Vec, default_dev_tokens: Vec, } @@ -876,7 +875,7 @@ impl Stater { })? .iter() .map(|mi| mi.mount_dir.clone()) - .collect::>(); + .collect::>(); // Reverse sort. The longer comes first. mount_list.sort(); mount_list.reverse(); @@ -899,7 +898,8 @@ impl Stater { for root in self.mount_list.as_ref()? { if path.starts_with(root) { - return Some(root.clone()); + // TODO: This is probably wrong, we should pass the OsString + return Some(root.to_string_lossy().into_owned()); } } None @@ -992,7 +992,7 @@ impl Stater { 'h' => OutputType::Unsigned(meta.nlink()), // inode number 'i' => OutputType::Unsigned(meta.ino()), - // mount point + // mount point: TODO: This should be an OsStr 'm' => OutputType::Str(self.find_mount_point(file).unwrap()), // file name 'n' => OutputType::Str(display_name.to_string()), @@ -1092,11 +1092,7 @@ impl Stater { OsString::from(file) }; if self.show_fs { - #[cfg(unix)] - let p = file.as_bytes(); - #[cfg(not(unix))] - let p = file.into_string().unwrap(); - match statfs(p) { + match statfs(&file) { Ok(meta) => { let tokens = &self.default_tokens; diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 9a37f22faf8..04175299f68 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -19,6 +19,7 @@ all-features = true path = "src/lib/lib.rs" [dependencies] +bstr = { workspace = true } chrono = { workspace = true, optional = true } clap = { workspace = true } uucore_procs = { workspace = true } diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index 7ae6281d4c9..e1bea168997 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -28,8 +28,9 @@ static EXIT_ERR: i32 = 1; #[cfg(windows)] use crate::show_warning; -#[cfg(windows)] use std::ffi::OsStr; +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; #[cfg(windows)] use std::os::windows::ffi::OsStrExt; #[cfg(windows)] @@ -61,17 +62,15 @@ fn to_nul_terminated_wide_string(s: impl AsRef) -> Vec { use libc::{ S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, mode_t, strerror, }; -use std::borrow::Cow; #[cfg(unix)] -use std::ffi::CStr; -#[cfg(unix)] -use std::ffi::CString; +use std::ffi::{CStr, CString}; use std::io::Error as IOError; #[cfg(unix)] use std::mem; #[cfg(windows)] use std::path::Path; use std::time::UNIX_EPOCH; +use std::{borrow::Cow, ffi::OsString}; #[cfg(any( target_os = "linux", @@ -123,14 +122,16 @@ impl BirthTime for Metadata { } } +// TODO: Types for this struct are probably mostly wrong. Possibly, most of them +// should be OsString. #[derive(Debug, Clone)] pub struct MountInfo { /// Stores `volume_name` in windows platform and `dev_id` in unix platform pub dev_id: String, pub dev_name: String, pub fs_type: String, - pub mount_root: String, - pub mount_dir: String, + pub mount_root: OsString, + pub mount_dir: OsString, /// We only care whether this field contains "bind" pub mount_option: String, pub remote: bool, @@ -138,7 +139,9 @@ pub struct MountInfo { } #[cfg(any(target_os = "linux", target_os = "android"))] -fn replace_special_chars(s: String) -> String { +fn replace_special_chars(s: &[u8]) -> Vec { + use bstr::ByteSlice; + // Replace // // * ASCII space with a regular space character, @@ -152,7 +155,11 @@ fn replace_special_chars(s: String) -> String { impl MountInfo { #[cfg(any(target_os = "linux", target_os = "android"))] - fn new(file_name: &str, raw: &[&str]) -> Option { + fn new(file_name: &str, raw: &[&[u8]]) -> Option { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + use std::os::unix::ffi::OsStringExt; + let dev_name; let fs_type; let mount_root; @@ -165,21 +172,24 @@ impl MountInfo { // "man proc" for more details LINUX_MOUNTINFO => { const FIELDS_OFFSET: usize = 6; - let after_fields = raw[FIELDS_OFFSET..].iter().position(|c| *c == "-").unwrap() + let after_fields = raw[FIELDS_OFFSET..] + .iter() + .position(|c| *c == b"-") + .unwrap() + FIELDS_OFFSET + 1; - dev_name = raw[after_fields + 1].to_string(); - fs_type = raw[after_fields].to_string(); - mount_root = raw[3].to_string(); - mount_dir = replace_special_chars(raw[4].to_string()); - mount_option = raw[5].to_string(); + dev_name = String::from_utf8_lossy(raw[after_fields + 1]).to_string(); + fs_type = String::from_utf8_lossy(raw[after_fields]).to_string(); + mount_root = OsStr::from_bytes(raw[3]).to_owned(); + mount_dir = OsString::from_vec(replace_special_chars(raw[4])); + mount_option = String::from_utf8_lossy(raw[5]).to_string(); } LINUX_MTAB => { - dev_name = raw[0].to_string(); - fs_type = raw[2].to_string(); - mount_root = String::new(); - mount_dir = replace_special_chars(raw[1].to_string()); - mount_option = raw[3].to_string(); + dev_name = String::from_utf8_lossy(raw[0]).to_string(); + fs_type = String::from_utf8_lossy(raw[2]).to_string(); + mount_root = OsString::new(); + mount_dir = OsString::from_vec(replace_special_chars(raw[1])); + mount_option = String::from_utf8_lossy(raw[3]).to_string(); } _ => return None, }; @@ -343,7 +353,7 @@ fn is_remote_filesystem(dev_name: &str, fs_type: &str) -> bool { } #[cfg(all(unix, not(any(target_os = "aix", target_os = "redox"))))] -fn mount_dev_id(mount_dir: &str) -> String { +fn mount_dev_id(mount_dir: &OsStr) -> String { use std::os::unix::fs::MetadataExt; if let Ok(stat) = std::fs::metadata(mount_dir) { @@ -426,10 +436,10 @@ pub fn read_fs_list() -> UResult> { .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f)))?; let reader = BufReader::new(f); Ok(reader - .lines() + .split(b'\n') .map_while(Result::ok) .filter_map(|line| { - let raw_data = line.split_whitespace().collect::>(); + let raw_data = line.split(|c| *c == b' ').collect::>(); MountInfo::new(file_name, &raw_data) }) .collect::>()) @@ -855,11 +865,13 @@ impl FsMeta for StatFs { } #[cfg(unix)] -pub fn statfs

(path: P) -> Result -where - P: Into>, -{ - match CString::new(path) { +pub fn statfs(path: &OsStr) -> Result { + #[cfg(unix)] + let p = path.as_bytes(); + #[cfg(not(unix))] + let p = path.into_string().unwrap(); + + match CString::new(p) { Ok(p) => { let mut buffer: StatFs = unsafe { mem::zeroed() }; unsafe { @@ -1060,8 +1072,8 @@ mod tests { // spell-checker:ignore (word) relatime let info = MountInfo::new( LINUX_MOUNTINFO, - &"106 109 253:6 / /mnt rw,relatime - xfs /dev/fs0 rw" - .split_ascii_whitespace() + &b"106 109 253:6 / /mnt rw,relatime - xfs /dev/fs0 rw" + .split(|c| *c == b' ') .collect::>(), ) .unwrap(); @@ -1075,8 +1087,8 @@ mod tests { // Test parsing with different amounts of optional fields. let info = MountInfo::new( LINUX_MOUNTINFO, - &"106 109 253:6 / /mnt rw,relatime master:1 - xfs /dev/fs0 rw" - .split_ascii_whitespace() + &b"106 109 253:6 / /mnt rw,relatime master:1 - xfs /dev/fs0 rw" + .split(|c| *c == b' ') .collect::>(), ) .unwrap(); @@ -1086,8 +1098,8 @@ mod tests { let info = MountInfo::new( LINUX_MOUNTINFO, - &"106 109 253:6 / /mnt rw,relatime master:1 shared:2 - xfs /dev/fs0 rw" - .split_ascii_whitespace() + &b"106 109 253:6 / /mnt rw,relatime master:1 shared:2 - xfs /dev/fs0 rw" + .split(|c| *c == b' ') .collect::>(), ) .unwrap(); @@ -1101,8 +1113,8 @@ mod tests { fn test_mountinfo_dir_special_chars() { let info = MountInfo::new( LINUX_MOUNTINFO, - &r#"317 61 7:0 / /mnt/f\134\040\011oo rw,relatime shared:641 - ext4 /dev/loop0 rw"# - .split_ascii_whitespace() + &br#"317 61 7:0 / /mnt/f\134\040\011oo rw,relatime shared:641 - ext4 /dev/loop0 rw"# + .split(|c| *c == b' ') .collect::>(), ) .unwrap(); @@ -1111,8 +1123,8 @@ mod tests { let info = MountInfo::new( LINUX_MTAB, - &r#"/dev/loop0 /mnt/f\134\040\011oo ext4 rw,relatime 0 0"# - .split_ascii_whitespace() + &br#"/dev/loop0 /mnt/f\134\040\011oo ext4 rw,relatime 0 0"# + .split(|c| *c == b' ') .collect::>(), ) .unwrap(); From adffd30f13aa0c2b391865c02b901495e5c989bc Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 23 Jul 2025 13:09:51 +0800 Subject: [PATCH 3/9] fsext: Add parsing test for non-unicode mount point --- src/uucore/src/lib/features/fsext.rs | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index e1bea168997..345c1bff17e 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -1131,4 +1131,35 @@ mod tests { assert_eq!(info.mount_dir, r#"/mnt/f\ oo"#); } + + #[test] + #[cfg(any(target_os = "linux", target_os = "android"))] + fn test_mountinfo_dir_non_unicode() { + let info = MountInfo::new( + LINUX_MOUNTINFO, + &b"317 61 7:0 / /mnt/some-\xc0-dir-\xf3 rw,relatime shared:641 - ext4 /dev/loop0 rw" + .split(|c| *c == b' ') + .collect::>(), + ) + .unwrap(); + + assert_eq!( + info.mount_dir, + crate::os_str_from_bytes(b"/mnt/some-\xc0-dir-\xf3").unwrap() + ); + + let info = MountInfo::new( + LINUX_MOUNTINFO, + &b"317 61 7:0 / /mnt/some-\\040-dir-\xf3 rw,relatime shared:641 - ext4 /dev/loop0 rw" + .split(|c| *c == b' ') + .collect::>(), + ) + .unwrap(); + + // Note that the \040 above will have been substituted by a space. + assert_eq!( + info.mount_dir, + crate::os_str_from_bytes(b"/mnt/some- -dir-\xf3").unwrap() + ); + } } From 418bf93bf650236e449dbfd8b34bc5e0c206b8de Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 22 Jul 2025 22:09:06 +0800 Subject: [PATCH 4/9] uucore: fsext: Fix Windows/Mac Windows handling can be done more properly, but I don't have a machine to test this. --- src/uu/df/src/filesystem.rs | 2 +- src/uucore/src/lib/features/fsext.rs | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/uu/df/src/filesystem.rs b/src/uu/df/src/filesystem.rs index 69e6b6dff3d..28e6bb6c03a 100644 --- a/src/uu/df/src/filesystem.rs +++ b/src/uu/df/src/filesystem.rs @@ -165,7 +165,7 @@ impl Filesystem { /// Find and create the filesystem from the given mount. #[cfg(windows)] - pub(crate) fn from_mount(mount: &MountInfo, file: Option) -> Result { + pub(crate) fn from_mount(mount: &MountInfo, file: Option) -> Result { Self::new(mount.clone(), file).ok_or(FsError::MountMissing) } diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index 345c1bff17e..072aaeeb147 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -19,12 +19,12 @@ const MAX_PATH: usize = 266; static EXIT_ERR: i32 = 1; #[cfg(any( - windows, target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] +use crate::os_str_from_bytes; #[cfg(windows)] use crate::show_warning; @@ -243,6 +243,8 @@ impl MountInfo { // TODO: support the case when `GetLastError()` returns `ERROR_MORE_DATA` return None; } + // TODO: This should probably call `OsString::from_wide`, but unclear if + // terminating zeros need to be striped first. let mount_root = LPWSTR2String(&mount_root_buf); let mut fs_type_buf = [0u16; MAX_PATH]; @@ -273,8 +275,8 @@ impl MountInfo { dev_id: volume_name, dev_name, fs_type: fs_type.unwrap_or_default(), - mount_root, - mount_dir: String::new(), + mount_root: mount_root.into(), // TODO: We should figure out how to keep an OsString here. + mount_dir: OsString::new(), mount_option: String::new(), remote, dummy: false, @@ -302,12 +304,11 @@ impl From for MountInfo { .to_string_lossy() .into_owned() }; - let mount_dir = unsafe { + let mount_dir_bytes = unsafe { // spell-checker:disable-next-line - CStr::from_ptr(&statfs.f_mntonname[0]) - .to_string_lossy() - .into_owned() + CStr::from_ptr(&statfs.f_mntonname[0]).to_bytes() }; + let mount_dir = os_str_from_bytes(mount_dir_bytes).unwrap().into_owned(); let dev_id = mount_dev_id(&mount_dir); let dummy = is_dummy_filesystem(&fs_type, ""); @@ -318,7 +319,7 @@ impl From for MountInfo { dev_name, fs_type, mount_dir, - mount_root: String::new(), + mount_root: OsString::new(), mount_option: String::new(), remote, dummy, From 2b34f0814ba693370bef4a841bfce935a2d65ed9 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 23 Jul 2025 22:06:20 +0800 Subject: [PATCH 5/9] df: table: Collect byte slices for Cell element Also pre-record the cell width as getting it back in the printing function would require some conversion back to a String. --- src/uu/df/src/table.rs | 161 ++++++++++++++++++++++++++++------------- 1 file changed, 111 insertions(+), 50 deletions(-) diff --git a/src/uu/df/src/table.rs b/src/uu/df/src/table.rs index baa22ea712b..50eee4837fe 100644 --- a/src/uu/df/src/table.rs +++ b/src/uu/df/src/table.rs @@ -192,6 +192,43 @@ impl From for Row { } } +/// A `Cell` in the table. We store raw `bytes` as the data (e.g. directory name +/// may be non-Unicode). We also record the printed `width` for alignment purpose, +/// as it is easier to compute on the original string. +struct Cell { + bytes: Vec, + width: usize, +} + +impl Cell { + /// Create a cell, knowing that s contains only 1-length chars + fn from_ascii_string>(s: T) -> Cell { + let s = s.as_ref(); + Cell { + bytes: s.as_bytes().into(), + width: s.len(), + } + } + + /// Create a cell from an unknown origin string that may contain + /// wide characters. + fn from_string>(s: T) -> Cell { + let s = s.as_ref(); + Cell { + bytes: s.as_bytes().into(), + width: UnicodeWidthStr::width(s), + } + } + + /// Create a cell from an `OsString` + fn from_os_string(os: &OsString) -> Cell { + Cell { + bytes: uucore::os_str_as_bytes(os).unwrap().to_vec(), + width: UnicodeWidthStr::width(os.to_string_lossy().as_ref()), + } + } +} + /// A formatter for [`Row`]. /// /// The `options` control how the information in the row gets formatted. @@ -225,47 +262,50 @@ impl<'a> RowFormatter<'a> { /// Get a string giving the scaled version of the input number. /// /// The scaling factor is defined in the `options` field. - fn scaled_bytes(&self, size: u64) -> String { - if let Some(h) = self.options.human_readable { + fn scaled_bytes(&self, size: u64) -> Cell { + let s = if let Some(h) = self.options.human_readable { to_magnitude_and_suffix(size.into(), SuffixType::HumanReadable(h)) } else { let BlockSize::Bytes(d) = self.options.block_size; (size as f64 / d as f64).ceil().to_string() - } + }; + Cell::from_ascii_string(s) } /// Get a string giving the scaled version of the input number. /// /// The scaling factor is defined in the `options` field. - fn scaled_inodes(&self, size: u128) -> String { - if let Some(h) = self.options.human_readable { + fn scaled_inodes(&self, size: u128) -> Cell { + let s = if let Some(h) = self.options.human_readable { to_magnitude_and_suffix(size, SuffixType::HumanReadable(h)) } else { size.to_string() - } + }; + Cell::from_ascii_string(s) } /// Convert a float between 0 and 1 into a percentage string. /// /// If `None`, return the string `"-"` instead. - fn percentage(fraction: Option) -> String { - match fraction { + fn percentage(fraction: Option) -> Cell { + let s = match fraction { None => "-".to_string(), Some(x) => format!("{:.0}%", (100.0 * x).ceil()), - } + }; + Cell::from_ascii_string(s) } /// Returns formatted row data. - fn get_values(&self) -> Vec { - let mut strings = Vec::new(); + fn get_cells(&self) -> Vec { + let mut cells = Vec::new(); for column in &self.options.columns { - let string = match column { + let cell = match column { Column::Source => { if self.is_total_row { - get_message("df-total") + Cell::from_string(get_message("df-total")) } else { - self.row.fs_device.to_string() + Cell::from_string(&self.row.fs_device) } } Column::Size => self.scaled_bytes(self.row.bytes), @@ -275,9 +315,9 @@ impl<'a> RowFormatter<'a> { Column::Target => { if self.is_total_row && !self.options.columns.contains(&Column::Source) { - get_message("df-total") + Cell::from_string(get_message("df-total")) } else { - self.row.fs_mount.to_string_lossy().into_owned() + Cell::from_os_string(&self.row.fs_mount) } } Column::Itotal => self.scaled_inodes(self.row.inodes), @@ -288,17 +328,17 @@ impl<'a> RowFormatter<'a> { .row .file .as_ref() - .map_or("-".into(), |s| s.to_string_lossy().into_owned()), + .map_or(Cell::from_ascii_string("-"), Cell::from_os_string), - Column::Fstype => self.row.fs_type.to_string(), + Column::Fstype => Cell::from_string(&self.row.fs_type), #[cfg(target_os = "macos")] Column::Capacity => Self::percentage(self.row.bytes_capacity), }; - strings.push(string); + cells.push(cell); } - strings + cells } } @@ -375,7 +415,7 @@ impl Header { /// The output table. pub(crate) struct Table { alignments: Vec, - rows: Vec>, + rows: Vec>, widths: Vec, } @@ -389,7 +429,7 @@ impl Table { .map(|(i, col)| Column::min_width(col).max(headers[i].len())) .collect(); - let mut rows = vec![headers]; + let mut rows = vec![headers.iter().map(Cell::from_string).collect()]; // The running total of filesystem sizes and usage. // @@ -404,7 +444,7 @@ impl Table { if options.show_all_fs || filesystem.usage.blocks > 0 { let row = Row::from(filesystem); let fmt = RowFormatter::new(&row, options, false); - let values = fmt.get_values(); + let values = fmt.get_cells(); total += row; rows.push(values); @@ -413,15 +453,15 @@ impl Table { if options.show_total { let total_row = RowFormatter::new(&total, options, true); - rows.push(total_row.get_values()); + rows.push(total_row.get_cells()); } // extend the column widths (in chars) for long values in rows // do it here, after total row was added to the list of rows for row in &rows { for (i, value) in row.iter().enumerate() { - if UnicodeWidthStr::width(value.as_str()) > widths[i] { - widths[i] = UnicodeWidthStr::width(value.as_str()); + if value.width > widths[i] { + widths[i] = value.width; } } } @@ -450,6 +490,8 @@ impl fmt::Display for Table { while let Some(row) = row_iter.next() { let mut col_iter = row.iter().enumerate().peekable(); while let Some((i, elem)) = col_iter.next() { + // TODO: Fix this, and print the bytes directly. + let elem = String::from_utf8(elem.bytes.clone()).unwrap_or("meh?".to_string()); let is_last_col = col_iter.peek().is_none(); match self.alignments.get(i) { @@ -490,7 +532,7 @@ mod tests { use crate::blocks::HumanReadable; use crate::columns::Column; - use crate::table::{Header, HeaderMode, Row, RowFormatter, Table}; + use crate::table::{Cell, Header, HeaderMode, Row, RowFormatter, Table}; use crate::{BlockSize, Options}; fn init() { @@ -674,6 +716,13 @@ mod tests { ); } + fn compare_cell_content(cells: Vec, expected: Vec<&str>) -> bool { + cells + .into_iter() + .zip(expected) + .all(|(c, s)| c.bytes == s.as_bytes()) + } + #[test] fn test_row_formatter() { init(); @@ -693,10 +742,10 @@ mod tests { ..Default::default() }; let fmt = RowFormatter::new(&row, &options, false); - assert_eq!( - fmt.get_values(), + assert!(compare_cell_content( + fmt.get_cells(), vec!("my_device", "100", "25", "75", "25%", "my_mount") - ); + )); } #[test] @@ -720,10 +769,10 @@ mod tests { ..Default::default() }; let fmt = RowFormatter::new(&row, &options, false); - assert_eq!( - fmt.get_values(), + assert!(compare_cell_content( + fmt.get_cells(), vec!("my_device", "my_type", "100", "25", "75", "25%", "my_mount") - ); + )); } #[test] @@ -746,10 +795,10 @@ mod tests { ..Default::default() }; let fmt = RowFormatter::new(&row, &options, false); - assert_eq!( - fmt.get_values(), + assert!(compare_cell_content( + fmt.get_cells(), vec!("my_device", "10", "2", "8", "20%", "my_mount") - ); + )); } #[test] @@ -766,7 +815,7 @@ mod tests { ..Default::default() }; let fmt = RowFormatter::new(&row, &options, false); - assert_eq!(fmt.get_values(), vec!("1", "10")); + assert!(compare_cell_content(fmt.get_cells(), vec!("1", "10"))); } #[test] @@ -790,10 +839,10 @@ mod tests { ..Default::default() }; let fmt = RowFormatter::new(&row, &options, false); - assert_eq!( - fmt.get_values(), + assert!(compare_cell_content( + fmt.get_cells(), vec!("my_device", "my_type", "4k", "1k", "3k", "25%", "my_mount") - ); + )); } #[test] @@ -817,10 +866,10 @@ mod tests { ..Default::default() }; let fmt = RowFormatter::new(&row, &options, false); - assert_eq!( - fmt.get_values(), + assert!(compare_cell_content( + fmt.get_cells(), vec!("my_device", "my_type", "4K", "1K", "3K", "25%", "my_mount") - ); + )); } #[test] @@ -835,13 +884,13 @@ mod tests { ..Default::default() }; let fmt = RowFormatter::new(&row, &options, false); - assert_eq!(fmt.get_values(), vec!("26%")); + assert!(compare_cell_content(fmt.get_cells(), vec!("26%"))); } #[test] fn test_row_formatter_with_round_up_byte_values() { init(); - fn get_formatted_values(bytes: u64, bytes_used: u64, bytes_avail: u64) -> Vec { + fn get_formatted_values(bytes: u64, bytes_used: u64, bytes_avail: u64) -> Vec { let options = Options { block_size: BlockSize::Bytes(1000), columns: vec![Column::Size, Column::Used, Column::Avail], @@ -854,13 +903,25 @@ mod tests { bytes_avail, ..Default::default() }; - RowFormatter::new(&row, &options, false).get_values() + RowFormatter::new(&row, &options, false).get_cells() } - assert_eq!(get_formatted_values(100, 100, 0), vec!("1", "1", "0")); - assert_eq!(get_formatted_values(100, 99, 1), vec!("1", "1", "1")); - assert_eq!(get_formatted_values(1000, 1000, 0), vec!("1", "1", "0")); - assert_eq!(get_formatted_values(1001, 1000, 1), vec!("2", "1", "1")); + assert!(compare_cell_content( + get_formatted_values(100, 100, 0), + vec!("1", "1", "0") + )); + assert!(compare_cell_content( + get_formatted_values(100, 99, 1), + vec!("1", "1", "1") + )); + assert!(compare_cell_content( + get_formatted_values(1000, 1000, 0), + vec!("1", "1", "0") + )); + assert!(compare_cell_content( + get_formatted_values(1001, 1000, 1), + vec!("2", "1", "1") + )); } #[test] From 439cf79211b4d6c984af740e7e1b97419691aca1 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 24 Jul 2025 09:01:58 +0800 Subject: [PATCH 6/9] df: table: Move from fmt to a non-standard write_to Will allow us to write u8 slices directly. Also move the last \n print to the function, simplifying the loop. --- src/uu/df/src/df.rs | 3 ++- src/uu/df/src/table.rs | 36 +++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index f0a8c40e897..6e51c98d0e1 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -20,6 +20,7 @@ use uucore::{format_usage, show}; use clap::{Arg, ArgAction, ArgMatches, Command, parser::ValueSource}; use std::ffi::OsString; +use std::io::stdout; use std::path::Path; use thiserror::Error; @@ -464,7 +465,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } }; - println!("{}", Table::new(&opt, filesystems)); + Table::new(&opt, filesystems).write_to(&mut stdout())?; Ok(()) } diff --git a/src/uu/df/src/table.rs b/src/uu/df/src/table.rs index 50eee4837fe..90a03a9c939 100644 --- a/src/uu/df/src/table.rs +++ b/src/uu/df/src/table.rs @@ -17,7 +17,6 @@ use uucore::fsext::{FsUsage, MountInfo}; use uucore::locale::get_message; use std::ffi::OsString; -use std::fmt; use std::ops::AddAssign; /// A row in the filesystem usage data table. @@ -482,12 +481,9 @@ impl Table { alignments } -} -impl fmt::Display for Table { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut row_iter = self.rows.iter().peekable(); - while let Some(row) = row_iter.next() { + pub(crate) fn write_to(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { + for row in &self.rows { let mut col_iter = row.iter().enumerate().peekable(); while let Some((i, elem)) = col_iter.next() { // TODO: Fix this, and print the bytes directly. @@ -498,26 +494,24 @@ impl fmt::Display for Table { Some(Alignment::Left) => { if is_last_col { // no trailing spaces in last column - write!(f, "{elem}")?; + write!(writer, "{elem}")?; } else { - write!(f, "{elem: { - write!(f, "{elem:>width$}", width = self.widths[i])?; + write!(writer, "{elem:>width$}", width = self.widths[i])?; } None => break, } if !is_last_col { // column separator - write!(f, " ")?; + write!(writer, " ")?; } } - if row_iter.peek().is_some() { - writeln!(f)?; - } + writeln!(writer)?; } Ok(()) @@ -996,22 +990,30 @@ mod tests { }; let table_w_total = Table::new(&options, filesystems.clone()); + let mut data_w_total: Vec = vec![]; + table_w_total + .write_to(&mut data_w_total) + .expect("Write error."); assert_eq!( - table_w_total.to_string(), + String::from_utf8_lossy(&data_w_total), "Filesystem Inodes IUsed IFree\n\ none 99999999999 99999000000 999999\n\ none 99999999999 99999000000 999999\n\ - total 199999999998 199998000000 1999998" + total 199999999998 199998000000 1999998\n" ); options.show_total = false; let table_w_o_total = Table::new(&options, filesystems); + let mut data_w_o_total: Vec = vec![]; + table_w_o_total + .write_to(&mut data_w_o_total) + .expect("Write error."); assert_eq!( - table_w_o_total.to_string(), + String::from_utf8_lossy(&data_w_o_total), "Filesystem Inodes IUsed IFree\n\ none 99999999999 99999000000 999999\n\ - none 99999999999 99999000000 999999" + none 99999999999 99999000000 999999\n" ); } From 6cc3eda8e409c1c4d47cca01cadda7ee218f98f6 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 24 Jul 2025 09:15:22 +0800 Subject: [PATCH 7/9] df: table: Print raw bytes Final step, let's just print the raw bytes now. --- src/uu/df/src/table.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/uu/df/src/table.rs b/src/uu/df/src/table.rs index 90a03a9c939..8d8d5af3d79 100644 --- a/src/uu/df/src/table.rs +++ b/src/uu/df/src/table.rs @@ -17,6 +17,7 @@ use uucore::fsext::{FsUsage, MountInfo}; use uucore::locale::get_message; use std::ffi::OsString; +use std::iter; use std::ops::AddAssign; /// A row in the filesystem usage data table. @@ -486,28 +487,28 @@ impl Table { for row in &self.rows { let mut col_iter = row.iter().enumerate().peekable(); while let Some((i, elem)) = col_iter.next() { - // TODO: Fix this, and print the bytes directly. - let elem = String::from_utf8(elem.bytes.clone()).unwrap_or("meh?".to_string()); let is_last_col = col_iter.peek().is_none(); + let pad_width = self.widths[i].saturating_sub(elem.width); match self.alignments.get(i) { Some(Alignment::Left) => { - if is_last_col { - // no trailing spaces in last column - write!(writer, "{elem}")?; - } else { - write!(writer, "{elem:>())?; } } Some(Alignment::Right) => { - write!(writer, "{elem:>width$}", width = self.widths[i])?; + writer.write_all(&iter::repeat_n(b' ', pad_width).collect::>())?; + writer.write_all(&elem.bytes)?; } None => break, } if !is_last_col { // column separator - write!(writer, " ")?; + writer.write_all(b" ")?; } } From 6884e29f976583a9835a63228ff85d8f7757f41e Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 24 Jul 2025 09:24:41 +0800 Subject: [PATCH 8/9] test_df: Use lossy stdout string The tests fail when a non-UTF8 path is mounted, that's... not a common case, but using a lossy string works just as well for the tests, so let's use that. --- tests/by-util/test_df.rs | 78 +++++++++++++++++------------------ tests/uutests/src/lib/util.rs | 5 +++ 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/tests/by-util/test_df.rs b/tests/by-util/test_df.rs index 35738d837c8..ecb60bc5fea 100644 --- a/tests/by-util/test_df.rs +++ b/tests/by-util/test_df.rs @@ -117,7 +117,7 @@ fn test_df_output() { .arg("-H") .arg("--total") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let actual = output.lines().take(1).collect::>()[0]; let actual = actual.split_whitespace().collect::>(); assert_eq!(actual, expected); @@ -151,7 +151,7 @@ fn test_df_output_overridden() { .arg("-hH") .arg("--total") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let actual = output.lines().take(1).collect::>()[0]; let actual = actual.split_whitespace().collect::>(); assert_eq!(actual, expected); @@ -181,7 +181,7 @@ fn test_default_headers() { "on", ] }; - let output = new_ucmd!().succeeds().stdout_move_str(); + let output = new_ucmd!().succeeds().stdout_str_lossy(); let actual = output.lines().take(1).collect::>()[0]; let actual = actual.split_whitespace().collect::>(); assert_eq!(actual, expected); @@ -195,7 +195,7 @@ fn test_precedence_of_human_readable_and_si_header_over_output_header() { let output = new_ucmd!() .args(&[arg, "--output=size"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output.lines().next().unwrap(); assert_eq!(header, " Size"); } @@ -207,7 +207,7 @@ fn test_used_header_starts_with_space() { // using -h here to ensure the width of the column's content is <= 4 .args(&["-h", "--output=used"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output.lines().next().unwrap(); assert_eq!(header, " Used"); } @@ -226,11 +226,11 @@ fn test_order_same() { let output1 = new_ucmd!() .arg("--output=source") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let output2 = new_ucmd!() .arg("--output=source") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); assert_eq!(output1, output2); } @@ -238,7 +238,7 @@ fn test_order_same() { #[cfg(all(unix, not(target_os = "freebsd")))] // FIXME: fix this test for FreeBSD #[test] fn test_output_mp_repeat() { - let output1 = new_ucmd!().arg("/").arg("/").succeeds().stdout_move_str(); + let output1 = new_ucmd!().arg("/").arg("/").succeeds().stdout_str_lossy(); let output1: Vec = output1 .lines() .map(|l| String::from(l.split_once(' ').unwrap().0)) @@ -272,7 +272,7 @@ fn test_type_option() { let fs_types = new_ucmd!() .arg("--output=fstype") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let fs_type = fs_types.lines().nth(1).unwrap().trim(); new_ucmd!().args(&["-t", fs_type]).succeeds(); @@ -292,7 +292,7 @@ fn test_type_option_with_file() { let fs_type = new_ucmd!() .args(&["--output=fstype", "."]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let fs_type = fs_type.lines().nth(1).unwrap().trim(); new_ucmd!().args(&["-t", fs_type, "."]).succeeds(); @@ -310,7 +310,7 @@ fn test_type_option_with_file() { let fs_types = new_ucmd!() .arg("--output=fstype") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let fs_types: Vec<_> = fs_types .lines() .skip(1) @@ -335,7 +335,7 @@ fn test_exclude_all_types() { let fs_types = new_ucmd!() .arg("--output=fstype") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let fs_types: HashSet<_> = fs_types.lines().skip(1).collect(); let mut args = Vec::new(); @@ -379,7 +379,7 @@ fn test_total() { // ... // /dev/loop14 63488 63488 0 100% /snap/core20/1361 // total 258775268 98099712 148220200 40% - - let output = new_ucmd!().arg("--total").succeeds().stdout_move_str(); + let output = new_ucmd!().arg("--total").succeeds().stdout_str_lossy(); // Skip the header line. let lines: Vec<&str> = output.lines().skip(1).collect(); @@ -422,21 +422,21 @@ fn test_total_label_in_correct_column() { let output = new_ucmd!() .args(&["--output=source", "--total", "."]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let last_line = output.lines().last().unwrap(); assert_eq!(last_line.trim(), "total"); let output = new_ucmd!() .args(&["--output=target", "--total", "."]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let last_line = output.lines().last().unwrap(); assert_eq!(last_line.trim(), "total"); let output = new_ucmd!() .args(&["--output=source,target", "--total", "."]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let last_line = output.lines().last().unwrap(); assert_eq!( last_line.split_whitespace().collect::>(), @@ -446,7 +446,7 @@ fn test_total_label_in_correct_column() { let output = new_ucmd!() .args(&["--output=target,source", "--total", "."]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let last_line = output.lines().last().unwrap(); assert_eq!( last_line.split_whitespace().collect::>(), @@ -463,7 +463,7 @@ fn test_use_percentage() { // "percentage" values. .args(&["--total", "--output=used,avail,pcent", "--block-size=1"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); // Skip the header line. let lines: Vec<&str> = output.lines().skip(1).collect(); @@ -488,7 +488,7 @@ fn test_iuse_percentage() { let output = new_ucmd!() .args(&["--total", "--output=itotal,iused,ipcent"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); // Skip the header line. let lines: Vec<&str> = output.lines().skip(1).collect(); @@ -518,7 +518,7 @@ fn test_default_block_size() { let output = new_ucmd!() .arg("--output=size") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output.lines().next().unwrap().trim().to_string(); assert_eq!(header, "1K-blocks"); @@ -527,7 +527,7 @@ fn test_default_block_size() { .arg("--output=size") .env("POSIXLY_CORRECT", "1") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output.lines().next().unwrap().trim().to_string(); assert_eq!(header, "512B-blocks"); @@ -547,14 +547,14 @@ fn test_default_block_size_in_posix_portability_mode() { .to_string() } - let output = new_ucmd!().arg("-P").succeeds().stdout_move_str(); + let output = new_ucmd!().arg("-P").succeeds().stdout_str_lossy(); assert_eq!(get_header(&output), "1024-blocks"); let output = new_ucmd!() .arg("-P") .env("POSIXLY_CORRECT", "1") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); assert_eq!(get_header(&output), "512-blocks"); } @@ -564,7 +564,7 @@ fn test_block_size_1024() { let output = new_ucmd!() .args(&["-B", &format!("{block_size}"), "--output=size"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); output.lines().next().unwrap().trim().to_string() } @@ -588,7 +588,7 @@ fn test_block_size_with_suffix() { let output = new_ucmd!() .args(&["-B", block_size, "--output=size"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); output.lines().next().unwrap().trim().to_string() } @@ -612,7 +612,7 @@ fn test_block_size_in_posix_portability_mode() { let output = new_ucmd!() .args(&["-P", "-B", block_size]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); output .lines() .next() @@ -639,7 +639,7 @@ fn test_block_size_from_env() { .arg("--output=size") .env(env_var, env_value) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); output.lines().next().unwrap().trim().to_string() } @@ -658,7 +658,7 @@ fn test_block_size_from_env_precedences() { .env(k1, v1) .env(k2, v2) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); output.lines().next().unwrap().trim().to_string() } @@ -677,7 +677,7 @@ fn test_precedence_of_block_size_arg_over_env() { .args(&["-B", "999", "--output=size"]) .env("DF_BLOCK_SIZE", "111") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output.lines().next().unwrap().trim().to_string(); assert_eq!(header, "999B-blocks"); @@ -691,7 +691,7 @@ fn test_invalid_block_size_from_env() { .arg("--output=size") .env("DF_BLOCK_SIZE", "invalid") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output.lines().next().unwrap().trim().to_string(); assert_eq!(header, default_block_size_header); @@ -701,7 +701,7 @@ fn test_invalid_block_size_from_env() { .env("DF_BLOCK_SIZE", "invalid") .env("BLOCK_SIZE", "222") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output.lines().next().unwrap().trim().to_string(); assert_eq!(header, default_block_size_header); @@ -717,7 +717,7 @@ fn test_ignore_block_size_from_env_in_posix_portability_mode() { .env("BLOCK_SIZE", "222") .env("BLOCKSIZE", "333") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let header = output .lines() .next() @@ -784,13 +784,13 @@ fn test_output_selects_columns() { let output = new_ucmd!() .args(&["--output=source"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); assert_eq!(output.lines().next().unwrap(), "Filesystem"); let output = new_ucmd!() .args(&["--output=source,target"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); assert_eq!( output .lines() @@ -804,7 +804,7 @@ fn test_output_selects_columns() { let output = new_ucmd!() .args(&["--output=source,target,used"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); assert_eq!( output .lines() @@ -821,7 +821,7 @@ fn test_output_multiple_occurrences() { let output = new_ucmd!() .args(&["--output=source", "--output=target"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); assert_eq!( output .lines() @@ -840,7 +840,7 @@ fn test_output_file_all_filesystems() { let output = new_ucmd!() .arg("--output=file") .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let mut lines = output.lines(); assert_eq!(lines.next().unwrap(), "File"); for line in lines { @@ -862,7 +862,7 @@ fn test_output_file_specific_files() { let output = ucmd .args(&["--output=file", "a", "b", "c"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let actual: Vec<&str> = output.lines().collect(); assert_eq!(actual, vec!["File", "a", "b", "c"]); } @@ -876,7 +876,7 @@ fn test_file_column_width_if_filename_contains_unicode_chars() { let output = ucmd .args(&["--output=file,target", "äöü.txt"]) .succeeds() - .stdout_move_str(); + .stdout_str_lossy(); let actual = output.lines().next().unwrap(); // expected width: 7 chars (length of äöü.txt) + 1 char (column separator) assert_eq!(actual, "File Mounted on"); diff --git a/tests/uutests/src/lib/util.rs b/tests/uutests/src/lib/util.rs index 154a7aa18fa..1e71a3cf1a5 100644 --- a/tests/uutests/src/lib/util.rs +++ b/tests/uutests/src/lib/util.rs @@ -357,6 +357,11 @@ impl CmdResult { std::str::from_utf8(&self.stdout).unwrap() } + /// Returns the program's standard output as a string, automatically handling invalid utf8 + pub fn stdout_str_lossy(self) -> String { + String::from_utf8_lossy(&self.stdout).to_string() + } + /// Returns the program's standard output as a string /// consumes self pub fn stdout_move_str(self) -> String { From f9af783c55acea70d68b44fde48db6234662758d Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 24 Jul 2025 10:14:01 +0800 Subject: [PATCH 9/9] df: table: Add test for non-Unicode printing --- src/uu/df/src/table.rs | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/uu/df/src/table.rs b/src/uu/df/src/table.rs index 8d8d5af3d79..e8b1c29e246 100644 --- a/src/uu/df/src/table.rs +++ b/src/uu/df/src/table.rs @@ -1018,6 +1018,56 @@ mod tests { ); } + #[cfg(unix)] + #[test] + fn test_table_column_width_non_unicode() { + init(); + let bad_unicode_os_str = uucore::os_str_from_bytes(b"/usr/lib/w\xf3l/drivers") + .expect("Only unix platforms can test non-unicode names") + .to_os_string(); + let d1 = crate::Filesystem { + file: None, + mount_info: crate::MountInfo { + dev_id: "28".to_string(), + dev_name: "none".to_string(), + fs_type: "9p".to_string(), + mount_dir: bad_unicode_os_str, + mount_option: "ro,nosuid,nodev,noatime".to_string(), + mount_root: "/".into(), + remote: false, + dummy: false, + }, + usage: crate::table::FsUsage { + blocksize: 4096, + blocks: 244_029_695, + bfree: 125_085_030, + bavail: 125_085_030, + bavail_top_bit_set: false, + files: 99_999_999_999, + ffree: 999_999, + }, + }; + + let filesystems = vec![d1]; + + let options = Options { + show_total: false, + columns: vec![Column::Source, Column::Target, Column::Itotal], + ..Default::default() + }; + + let table = Table::new(&options, filesystems.clone()); + let mut data: Vec = vec![]; + table.write_to(&mut data).expect("Write error."); + assert_eq!( + data, + b"Filesystem Mounted on Inodes\n\ + none /usr/lib/w\xf3l/drivers 99999999999\n", + "Comparison failed, lossy data for reference:\n{}\n", + String::from_utf8_lossy(&data) + ); + } + #[test] fn test_row_accumulation_u64_overflow() { init();