diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 14b41a427f1e0..c883f8c051eef 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -3308,9 +3308,146 @@ impl Path { fs::canonicalize(self) } - /// Normalize a path, including `..` without traversing the filesystem. + /// Does the path represent a child of the current location? Path components are evaluated + /// naively with no filesystem traversal, so a "bounded" path may still be able to escape the + /// working directory when applied to the filesystem if symlinks are present. /// - /// Returns an error if normalization would leave leading `..` components. + /// # Examples + /// + /// ``` + /// #![feature(normalize_lexically)] + /// use std::path::Path; + /// + /// assert!(Path::new("abc").is_lexically_bounded()); + /// assert!(Path::new("abc/../def").is_lexically_bounded()); + /// + /// assert!(!Path::new("").is_lexically_bounded()); + /// assert!(!Path::new(".").is_lexically_bounded()); + /// assert!(!Path::new("..").is_lexically_bounded()); + /// assert!(!Path::new("abc/../../def").is_lexically_bounded()); + /// assert!(!Path::new("abc/..").is_lexically_bounded()); + /// assert!(!Path::new("/abc").is_lexically_bounded()); + /// ``` + #[unstable(feature = "normalize_lexically", issue = "134694")] + pub fn is_lexically_bounded(&self) -> bool { + use Component::*; + + self.components() + .try_fold(0usize, |depth, component| match component { + Prefix(_) | RootDir => None, + CurDir => Some(depth), + Normal(_) => Some(depth + 1), + ParentDir => depth.checked_sub(1), + }) + .is_some_and(|i| i > 0) + } + + /// Is the path normalized, ie. expressed in simplest possible terms? A normalized path: + /// + /// * Starts with either + /// * a prefix followed by root (`C:\`); or + /// * a prefix (`C:`); or + /// * root (`/`); or + /// * `.`; or + /// * zero or more `..` + /// * Continues with zero or more normal segments (`abc`) + /// * Contains only the primary platform separator, if applicable (eg. only `\\` rather than + /// `/` on Windows) + /// * Contains no repeated separators unless as part of the prefix + /// * Does not end with a separator unless as part of the prefix + /// + /// # Examples + /// + /// TODO: examples for non-*nix platforms + /// + /// ``` + /// #![feature(normalize_lexically)] + /// use std::path::Path; + /// + /// assert!(Path::new("").is_normalized()); + /// assert!(Path::new(".").is_normalized()); + /// assert!(Path::new("abc").is_normalized()); + /// assert!(Path::new("./abc").is_normalized()); + /// assert!(Path::new("../../abc").is_normalized()); + /// assert!(Path::new("/").is_normalized()); + /// + /// assert!(!Path::new("abc/../def").is_normalized()); + /// assert!(!Path::new("//abc").is_normalized()); + /// assert!(!Path::new(".//abc").is_normalized()); + /// assert!(!Path::new("//").is_normalized()); + /// assert!(!Path::new("/../abc").is_normalized()); + /// assert!(!Path::new("abc/./def").is_normalized()); + /// assert!(!Path::new("abc/").is_normalized()); + /// assert!(!Path::new("abc/.").is_normalized()); + /// assert!(!Path::new("./").is_normalized()); + /// assert!(!Path::new("/.").is_normalized()); + /// ``` + #[unstable(feature = "normalize_lexically", issue = "134694")] + pub fn is_normalized(&self) -> bool { + use Component::*; + + // TODO: This can be compiled out on platforms that only recognize one separator and can be + // optimized on platforms with two. + // See: https://github.com/rust-lang/libs-team/issues/744 + if self.as_u8_slice().iter().any(|&b| is_sep_byte(b) && b != MAIN_SEPARATOR as u8) { + return false; + } + + let mut components = self.components(); + let Some(first) = components.next() else { + return true; + }; + + if !match first { + Prefix(_) => { + components.skip_while(|c| matches!(c, RootDir)).all(|c| matches!(c, Normal(_))) + } + RootDir | CurDir | Normal(_) => components.all(|c| matches!(c, Normal(_))), + ParentDir => { + components.skip_while(|c| matches!(c, ParentDir)).all(|c| matches!(c, Normal(_))) + } + } { + return false; + } + + // TODO: Checking for the component iterator silently dropping repeated separators or + // current directory components can be done inline with the previous pass and should maybe + // be done without hooking into the iterator internals. + components = self.components(); // restart the iterator + let mut prev = None; + while components.front < State::Body { + prev = components.next().or(prev); + } + + let mut is_consecutive_empty = false; + while !components.path.is_empty() { + // This is how the iterator internally communicates skipping a component + let (len, component) = components.parse_next_component(); + if component.is_some() { + is_consecutive_empty = false; + prev = component; + } else { + if prev != Some(CurDir) || is_consecutive_empty { + return false; + } + is_consecutive_empty = true; + } + components.path = &components.path[len..]; + } + + if let Some(prev) = prev + && !self.as_u8_slice().ends_with(prev.as_os_str().as_encoded_bytes()) + { + return false; + } + + true + } + + /// Normalize a path, including `..` without traversing the filesystem. Any remaining `..` + /// components that can't be normalized are collected at the beginning of the path. Returns + /// [`Cow::Borrowed`] if the path is already normalized, otherwise [`Cow::Owned`] containing + /// the normalized form in a [`PathBuf`]. /// ///