From c6ccb69aae884dcaa76cc9d468d246436fa8fa74 Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Tue, 11 Nov 2025 12:23:17 +0300 Subject: [PATCH 1/8] crate::path::Path improvements --- src/path/mod.rs | 128 +++++++++++++++++++++++++++++++++++++++------- src/path/parts.rs | 36 ++++++++++++- 2 files changed, 145 insertions(+), 19 deletions(-) diff --git a/src/path/mod.rs b/src/path/mod.rs index f8affe8d..98830455 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -17,7 +17,6 @@ //! Path abstraction for Object Storage -use itertools::Itertools; use percent_encoding::percent_decode; use std::fmt::Formatter; #[cfg(not(target_arch = "wasm32"))] @@ -29,9 +28,12 @@ pub const DELIMITER: &str = "/"; /// The path delimiter as a single byte pub const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0]; +/// The path delimiter as a single char +pub const DELIMITER_CHAR: char = DELIMITER_BYTE as char; + mod parts; -pub use parts::{InvalidPart, PathPart}; +pub use parts::{InvalidPart, PathPart, PathParts}; /// Error returned by [`Path::parse`] #[derive(Debug, thiserror::Error)] @@ -157,6 +159,18 @@ pub struct Path { } impl Path { + /// An empty [`Path`] that points to the root of the store, equivalent to `Path::from("/")`. + /// + /// See also [`Path::is_root`]. + /// + /// # Example + /// + /// ``` + /// # use object_store::path::Path; + /// assert_eq!(Path::ROOT, Path::from("/")); + /// ``` + pub const ROOT: Self = Self { raw: String::new() }; + /// Parse a string as a [`Path`], returning a [`Error`] if invalid, /// as defined on the docstring for [`Path`] /// @@ -255,14 +269,53 @@ impl Path { Self::parse(decoded) } - /// Returns the [`PathPart`] of this [`Path`] - pub fn parts(&self) -> impl Iterator> { - self.raw - .split_terminator(DELIMITER) - .map(|s| PathPart { raw: s.into() }) + /// Returns the number of [`PathPart`]s in this [`Path`] + /// + /// This is equivalent to calling `.parts().count()` manually. + /// + /// # Performance + /// + /// This operation is `O(n)`. + #[doc(alias = "len")] + pub fn parts_count(&self) -> usize { + self.raw.split_terminator(DELIMITER).count() + } + + /// True if this [`Path`] points to the root of the store, equivalent to `Path::from("/")`. + /// + /// See also [`Path::root`]. + /// + /// # Example + /// + /// ``` + /// # use object_store::path::Path; + /// assert!(Path::from("/").is_root()); + /// assert!(Path::parse("").unwrap().is_root()); + /// ``` + pub fn is_root(&self) -> bool { + self.raw.is_empty() + } + + /// Returns the [`PathPart`]s of this [`Path`] + pub fn parts(&self) -> PathParts<'_> { + PathParts::new(&self.raw) + } + + /// Returns a copy of this [`Path`] with the last path segment removed + /// + /// Returns `None` if this path has zero segments. + #[doc(alias = "folder")] + pub fn prefix(&self) -> Option { + let (prefix, _filename) = self.raw.rsplit_once(DELIMITER)?; + + Some(Self { + raw: prefix.to_string(), + }) } /// Returns the last path segment containing the filename stored in this [`Path`] + /// + /// Returns `None` only if this path is the root path. pub fn filename(&self) -> Option<&str> { match self.raw.is_empty() { true => None, @@ -285,16 +338,13 @@ impl Path { /// Returns an iterator of the [`PathPart`] of this [`Path`] after `prefix` /// - /// Returns `None` if the prefix does not match + /// Returns `None` if the prefix does not match. pub fn prefix_match(&self, prefix: &Self) -> Option> + '_> { let mut stripped = self.raw.strip_prefix(&prefix.raw)?; if !stripped.is_empty() && !prefix.raw.is_empty() { stripped = stripped.strip_prefix(DELIMITER)?; } - let iter = stripped - .split_terminator(DELIMITER) - .map(|x| PathPart { raw: x.into() }); - Some(iter) + Some(PathParts::new(stripped)) } /// Returns true if this [`Path`] starts with `prefix` @@ -348,13 +398,22 @@ where I: Into>, { fn from_iter>(iter: T) -> Self { - let raw = T::into_iter(iter) - .map(|s| s.into()) - .filter(|s| !s.raw.is_empty()) - .map(|s| s.raw) - .join(DELIMITER); + let mut this = Self::ROOT; + this.extend(iter); + this + } +} - Self { raw } +impl<'a, I: Into>> Extend for Path { + fn extend>(&mut self, iter: T) { + for s in iter.into_iter() { + let s = s.into(); + if s.raw.is_empty() { + continue; + } + self.raw.push(DELIMITER_CHAR); + self.raw.push_str(&s.raw); + } } } @@ -370,6 +429,11 @@ pub(crate) fn absolute_path_to_url(path: impl AsRef) -> Result< mod tests { use super::*; + #[test] + fn delimiter_char_is_forward_slash() { + assert_eq!(DELIMITER_CHAR, '/'); + } + #[test] fn cloud_prefix_with_trailing_delimiter() { // Use case: files exist in object storage named `foo/bar.json` and @@ -469,6 +533,23 @@ mod tests { assert_eq!(Path::default().parts().count(), 0); } + #[test] + fn parts_count() { + assert_eq!(Path::ROOT.parts().count(), Path::ROOT.parts_count()); + + let path = path("foo/bar/baz"); + assert_eq!(path.parts().count(), path.parts_count()); + } + + #[test] + fn prefix_matches_raw_content() { + assert_eq!(Path::ROOT.prefix(), None, "empty path must have no prefix"); + + assert_eq!(path("foo").prefix().unwrap(), Path::ROOT); + assert_eq!(path("foo/bar").prefix().unwrap(), path("foo")); + assert_eq!(path("foo/bar/baz").prefix().unwrap(), path("foo/bar")); + } + #[test] fn prefix_matches() { let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something"]); @@ -611,4 +692,15 @@ mod tests { assert_eq!(c.extension(), None); assert_eq!(d.extension(), Some("qux")); } + + #[test] + fn root_is_root() { + assert!(Path::ROOT.is_root()); + } + + /// Construct a [`Path`] from a raw `&str`, or panic trying. + #[track_caller] + fn path(raw: &str) -> Path { + Path::parse(raw).unwrap() + } } diff --git a/src/path/parts.rs b/src/path/parts.rs index 21705100..8a344cd0 100644 --- a/src/path/parts.rs +++ b/src/path/parts.rs @@ -16,7 +16,11 @@ // under the License. use percent_encoding::{AsciiSet, CONTROLS, percent_encode}; -use std::borrow::Cow; +use std::{ + borrow::Cow, + iter::{self, FusedIterator}, + str::SplitTerminator, +}; use crate::path::DELIMITER_BYTE; @@ -131,6 +135,36 @@ impl AsRef for PathPart<'_> { } } +/// See [`Path::parts`](super::Path::parts) +#[derive(Debug, Clone)] +pub struct PathParts<'a>(iter::Map, fn(&str) -> PathPart<'_>>); + +impl<'a> PathParts<'a> { + /// Create an iterator over the parts of the provided raw [`Path`]. + pub(super) fn new(raw: &'a str) -> Self { + Self( + raw.split_terminator(super::DELIMITER_CHAR) + .map(|s| PathPart { raw: s.into() }), + ) + } +} + +impl<'a> Iterator for PathParts<'a> { + type Item = PathPart<'a>; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl<'a> FusedIterator for PathParts<'a> {} + +impl<'a> DoubleEndedIterator for PathParts<'a> { + fn next_back(&mut self) -> Option { + self.0.next_back() + } +} + #[cfg(test)] mod tests { use super::*; From 1696fe8da757c6d0ae7b5e34dd175a1b65d8d8df Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Thu, 4 Dec 2025 16:26:20 +0300 Subject: [PATCH 2/8] fn prefix -> fn parent --- src/path/mod.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/path/mod.rs b/src/path/mod.rs index 98830455..ccc2f1d3 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -304,8 +304,7 @@ impl Path { /// Returns a copy of this [`Path`] with the last path segment removed /// /// Returns `None` if this path has zero segments. - #[doc(alias = "folder")] - pub fn prefix(&self) -> Option { + pub fn parent(&self) -> Option { let (prefix, _filename) = self.raw.rsplit_once(DELIMITER)?; Some(Self { @@ -543,11 +542,11 @@ mod tests { #[test] fn prefix_matches_raw_content() { - assert_eq!(Path::ROOT.prefix(), None, "empty path must have no prefix"); + assert_eq!(Path::ROOT.parent(), None, "empty path must have no prefix"); - assert_eq!(path("foo").prefix().unwrap(), Path::ROOT); - assert_eq!(path("foo/bar").prefix().unwrap(), path("foo")); - assert_eq!(path("foo/bar/baz").prefix().unwrap(), path("foo/bar")); + assert_eq!(path("foo").parent().unwrap(), Path::ROOT); + assert_eq!(path("foo/bar").parent().unwrap(), path("foo")); + assert_eq!(path("foo/bar/baz").parent().unwrap(), path("foo/bar")); } #[test] From ee1bd651039bced554cb7b01ecae3a47d317cbe5 Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Thu, 4 Dec 2025 16:28:59 +0300 Subject: [PATCH 3/8] test: check exact part count --- src/path/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/path/mod.rs b/src/path/mod.rs index ccc2f1d3..aed8354e 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -537,7 +537,8 @@ mod tests { assert_eq!(Path::ROOT.parts().count(), Path::ROOT.parts_count()); let path = path("foo/bar/baz"); - assert_eq!(path.parts().count(), path.parts_count()); + assert_eq!(path.parts_count(), 3); + assert_eq!(path.parts_count(), path.parts().count()); } #[test] From bd279bc46e77b9fe0fa3284963b4af27dd649434 Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Thu, 4 Dec 2025 16:46:50 +0300 Subject: [PATCH 4/8] fix: impl Extend must not add / before the first part --- src/path/mod.rs | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/path/mod.rs b/src/path/mod.rs index aed8354e..05f2c0dc 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -403,15 +403,35 @@ where } } +/// [`Path`] supports appending [`PathPart`]s of one `Path` to another `Path`. +/// +/// # Examples +/// +/// Suppose Alice is copying Bob's file to her own user directory. +/// We could choose the full path of the new file by taking the original +/// absolute path, making it relative to Bob's home +/// +/// ```rust +/// # use object_store::path::Path; +/// let alice_home = Path::from("Users/alice"); +/// let bob_home = Path::from("Users/bob"); +/// let bob_file = Path::from("Users/bob/documents/file.txt"); +/// +/// let mut alice_file = alice_home; +/// alice_file.extend(bob_file.prefix_match(&bob_home).unwrap()); +/// +/// assert_eq!(alice_file, Path::from("Users/alice/documents/file.txt")); +/// ``` impl<'a, I: Into>> Extend for Path { fn extend>(&mut self, iter: T) { - for s in iter.into_iter() { + for s in iter { let s = s.into(); - if s.raw.is_empty() { - continue; + if !s.raw.is_empty() { + if !self.raw.is_empty() { + self.raw.push(DELIMITER_CHAR); + } + self.raw.push_str(&s.raw); } - self.raw.push(DELIMITER_CHAR); - self.raw.push_str(&s.raw); } } } From 92e0187dd95db4dc0e9d37534648ac1b1ad2b81e Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Thu, 4 Dec 2025 16:56:16 +0300 Subject: [PATCH 5/8] impl IntoIterator for &Path; add second test for impl Extend --- src/path/mod.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/path/mod.rs b/src/path/mod.rs index 05f2c0dc..dfc93b3a 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -297,6 +297,8 @@ impl Path { } /// Returns the [`PathPart`]s of this [`Path`] + /// + /// Equivalent to calling `.into_iter()` on a `&Path`. pub fn parts(&self) -> PathParts<'_> { PathParts::new(&self.raw) } @@ -403,6 +405,16 @@ where } } +/// See also [`Path::parts`] +impl<'a> IntoIterator for &'a Path { + type Item = PathPart<'a>; + type IntoIter = PathParts<'a>; + + fn into_iter(self) -> Self::IntoIter { + PathParts::new(&self.raw) + } +} + /// [`Path`] supports appending [`PathPart`]s of one `Path` to another `Path`. /// /// # Examples @@ -718,6 +730,20 @@ mod tests { assert!(Path::ROOT.is_root()); } + #[test] + fn impl_extend() { + let mut p = Path::ROOT; + + p.extend(&Path::ROOT); + assert_eq!(p, Path::ROOT); + + p.extend(&path("foo/bar")); + assert_eq!(p, path("foo/bar")); + + p.extend(&path("baz/xyz/abc")); + assert_eq!(p, path("foo/bar/baz/xyz/abc")); + } + /// Construct a [`Path`] from a raw `&str`, or panic trying. #[track_caller] fn path(raw: &str) -> Path { From c78e6ec5e2a1f5a9b023b024041a79bc76348bef Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Thu, 4 Dec 2025 17:02:41 +0300 Subject: [PATCH 6/8] test: specifically cover adding one segment --- src/path/mod.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/path/mod.rs b/src/path/mod.rs index dfc93b3a..201f455a 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -728,8 +728,10 @@ mod tests { #[test] fn root_is_root() { assert!(Path::ROOT.is_root()); + assert!(Path::ROOT.parts().next().is_none()); } + /// Main test for `impl Extend for Path`, covers most cases. #[test] fn impl_extend() { let mut p = Path::ROOT; @@ -737,11 +739,29 @@ mod tests { p.extend(&Path::ROOT); assert_eq!(p, Path::ROOT); - p.extend(&path("foo/bar")); + p.extend(&path("foo")); + assert_eq!(p, path("foo")); + + p.extend(&path("bar/baz")); + assert_eq!(p, path("foo/bar/baz")); + + p.extend(&path("a/b/c")); + assert_eq!(p, path("foo/bar/baz/a/b/c")); + } + + /// Test for `impl Extend for Path`, specifically covers addition of a single segment. + #[test] + fn impl_extend_for_one_segment() { + let mut p = Path::ROOT; + + p.extend(&path("foo")); + assert_eq!(p, path("foo")); + + p.extend(&path("bar")); assert_eq!(p, path("foo/bar")); - p.extend(&path("baz/xyz/abc")); - assert_eq!(p, path("foo/bar/baz/xyz/abc")); + p.extend(&path("baz")); + assert_eq!(p, path("foo/bar/baz")); } /// Construct a [`Path`] from a raw `&str`, or panic trying. From 693a60c646fe07682c5308a4bd67c6f81d37742f Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Fri, 5 Dec 2025 22:34:39 +0300 Subject: [PATCH 7/8] fix: broken intra-doc links --- src/path/mod.rs | 2 +- src/path/parts.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/path/mod.rs b/src/path/mod.rs index 201f455a..c7346679 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -283,7 +283,7 @@ impl Path { /// True if this [`Path`] points to the root of the store, equivalent to `Path::from("/")`. /// - /// See also [`Path::root`]. + /// See also [`Path::ROOT`]. /// /// # Example /// diff --git a/src/path/parts.rs b/src/path/parts.rs index 8a344cd0..5628d3f4 100644 --- a/src/path/parts.rs +++ b/src/path/parts.rs @@ -140,7 +140,7 @@ impl AsRef for PathPart<'_> { pub struct PathParts<'a>(iter::Map, fn(&str) -> PathPart<'_>>); impl<'a> PathParts<'a> { - /// Create an iterator over the parts of the provided raw [`Path`]. + /// Create an iterator over the parts of the provided raw [`Path`](super::Path). pub(super) fn new(raw: &'a str) -> Self { Self( raw.split_terminator(super::DELIMITER_CHAR) From 071a1997efbe275cf06c542b0d2067906c38b82d Mon Sep 17 00:00:00 2001 From: Ruslan Fadeev Date: Fri, 5 Dec 2025 22:40:09 +0300 Subject: [PATCH 8/8] fix: Path::parent returning None for single segment --- src/path/mod.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/path/mod.rs b/src/path/mod.rs index c7346679..e8618db2 100644 --- a/src/path/mod.rs +++ b/src/path/mod.rs @@ -307,7 +307,13 @@ impl Path { /// /// Returns `None` if this path has zero segments. pub fn parent(&self) -> Option { - let (prefix, _filename) = self.raw.rsplit_once(DELIMITER)?; + if self.raw.is_empty() { + return None; + } + + let Some((prefix, _filename)) = self.raw.rsplit_once(DELIMITER) else { + return Some(Self::ROOT); + }; Some(Self { raw: prefix.to_string(), @@ -764,6 +770,14 @@ mod tests { assert_eq!(p, path("foo/bar/baz")); } + #[test] + fn parent() { + assert_eq!(Path::ROOT.parent(), None); + assert_eq!(path("foo").parent(), Some(Path::ROOT)); + assert_eq!(path("foo/bar").parent(), Some(path("foo"))); + assert_eq!(path("foo/bar/baz").parent(), Some(path("foo/bar"))); + } + /// Construct a [`Path`] from a raw `&str`, or panic trying. #[track_caller] fn path(raw: &str) -> Path {