diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..df01be1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,92 @@ +name: ci +on: + pull_request: + branches: + - main + push: + branches: + - main + schedule: + - cron: "00 01 * * *" + +permissions: + contents: read + +jobs: + test: + name: test + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - build: pinned + os: ubuntu-latest + rust: 1.81.0 + - build: pinned-win + os: windows-latest + rust: 1.81.0 + - build: stable + os: ubuntu-latest + rust: stable + - build: beta + os: ubuntu-latest + rust: beta + - build: nightly + os: ubuntu-latest + rust: nightly + - build: macos + os: macos-latest + rust: stable + - build: win-msvc + os: windows-latest + rust: stable + - build: win-gnu + os: windows-latest + rust: stable-x86_64-gnu + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + - run: cargo build --verbose + - if: startsWith(matrix.build, 'pinned-') == false + run: cargo doc --verbose + - if: startsWith(matrix.build, 'pinned-') == false + run: cargo test --verbose + - if: matrix.build == 'nightly' + run: | + set -x + cargo generate-lockfile -Z minimal-versions + cargo build --verbose + cargo test --verbose + + rustfmt: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + components: rustfmt + - name: Check formatting + run: | + cargo fmt -- --check + + clippy: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + components: clippy + - name: Check linting + run: | + cargo clippy --tests -- -D warnings diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..e22ceb8 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "multiglob" +version = "0.1.0" +edition = "2021" +authors = ["Ivan Smirnov "] +license = "MIT or Apache-2.0" +repository = "https://github.com/aldanor/multiglob" +homepage = "https://github.com/aldanor/multiglob" +documentation = "https://docs.rs/multiglob" +categories = ["filesystem"] +keywords = ["glob", "walk", "pattern", "directory", "recursive"] +readme = "README.md" +resolver = "2" +rust-version = "1.81" + +[dependencies] +globset = "0.4.1" +walkdir = "2.4" + +[target.'cfg(windows)'.dependencies.winapi-util] +version = "0.1" + +[dev-dependencies] +current_dir = "0.1" +insta = "1.43" +pretty_assertions = "1.4" +rstest = "0.25" +tempfile = "3" diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..a41b712 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 99 +use_small_heuristics = "Max" diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 0000000..5984e34 --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,217 @@ +use std::path::{Path, PathBuf}; + +use globset::Glob; +use walkdir::WalkDir; + +use crate::{cluster::cluster_globs, walk::MultiGlobWalker, GlobError}; + +/// Internal structure for keeping all walkdir/globset options together to pass them around. +#[derive(Clone, Copy, Debug)] +pub struct MultiGlobOptions { + pub follow_links: bool, + pub max_depth: usize, + pub max_open: usize, + pub same_file_system: bool, + pub case_insensitive: bool, + pub canonicalize: bool, +} + +impl Default for MultiGlobOptions { + fn default() -> Self { + Self { + follow_links: false, + max_depth: usize::MAX, + max_open: 10, + same_file_system: false, + case_insensitive: false, + canonicalize: false, + } + } +} + +impl MultiGlobOptions { + pub fn configure_walkdir(&self, walkdir: WalkDir) -> WalkDir { + walkdir + .sort_by_file_name() + .follow_links(self.follow_links) + .max_open(self.max_open) + .same_file_system(self.same_file_system) + } +} + +/// A builder to create an iterator over multiple globs from a given base path. +#[derive(Clone, Debug)] +pub struct MultiGlobBuilder { + base: PathBuf, + patterns: Vec, + opts: MultiGlobOptions, +} + +impl MultiGlobBuilder { + /// Construct a new multiglob walker builder from a base directory and a list of patterns. + /// + /// When iterated, the `base` directory will be recursively searched for paths + /// matching `patterns`. + pub fn new(base: B, patterns: P) -> Self + where + B: AsRef, + P: IntoIterator, + S: AsRef, + { + Self { + base: base.as_ref().to_owned(), + patterns: patterns.into_iter().map(|s| s.as_ref().to_owned()).collect(), + opts: MultiGlobOptions::default(), + } + } + + /// Construct a multiglob walker; error that may occur when parsing globs will be propagated. + pub fn build(&self) -> Result { + let (walker, mut errors) = self.build_skip_invalid(); + if !errors.is_empty() { + Err(errors.remove(0)) + } else { + Ok(walker) + } + } + + /// Construct a multiglob walker and skip all invalid globs patterns. + /// + /// Returns list of all glob errors encountered as the second element of the tuple. + /// Note: invalid glob patterns reported in errors will not be the original patterns + pub fn build_skip_invalid(&self) -> (MultiGlobWalker, Vec) { + let mut patterns = self.patterns.clone(); + let mut errors = Vec::new(); + patterns.retain(|p| { + // do this early to try and retain original glob patterns in reported errors + if let Some(err) = Glob::new(p).err() { + errors.push(err); + false + } else { + true + } + }); + let mut walker = MultiGlobWalker::new(self.base.clone(), self.opts); + let glob_groups = cluster_globs(&patterns); + let mut mg_base = self.base.clone(); + if mg_base == PathBuf::new() { + mg_base = ".".into(); + } + for (base, patterns) in glob_groups { + let mut base = mg_base.join(base); + if base == mg_base { + base = mg_base.clone(); + } + walker.add(base, patterns, &mut errors); + } + (walker.rev(), errors) + } + + /// Toggle whether the globs should be matched case insensitively or not. + /// + /// This is disabled by default. + pub fn case_insensitive(mut self, yes: bool) -> Self { + self.opts.case_insensitive = yes; + self + } + + /// Set the maximum depth of all recursive globs (those containing `**`). + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + /// + /// This will not simply filter the entries of the iterator, but + /// it will actually avoid descending into directories when the depth is + /// exceeded. + /// + /// Note that the depth is counted not from the base directory, but from a point + /// where a recursive pattern is encountered. For example, if maximum depth is 2 + /// and patterns are `../a/**` and `b/**`, then the deepest entries will look + /// like `../a/x/y` and `b/x/y`. + /// + /// By default, there's no max depth limit. + pub fn max_depth(mut self, depth: usize) -> Self { + self.opts.max_depth = depth; + self + } + + /// Follow symbolic links. By default, this is disabled. + /// + /// When `yes` is `true`, symbolic links are followed as if they were + /// normal directories and files. If a symbolic link is broken or is + /// involved in a loop, an error is yielded. + /// + /// When enabled, the yielded [`DirEntry`] values represent the target of + /// the link while the path corresponds to the link. See the [`DirEntry`] + /// type for more details. + /// + /// Note, this only affects parts of globs starting from the first glob-like + /// component. For example, in a pattern `a/b/*/c/**` this will only affect + /// the `*/c/**` part of the pattern. + /// + /// [`DirEntry`]: struct.DirEntry.html + pub fn follow_links(mut self, yes: bool) -> Self { + self.opts.follow_links = yes; + self + } + + /// Set the maximum number of simultaneously open file descriptors used + /// by glob walker iterators. + /// + /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set + /// to `1` automatically. If this is not set, then it defaults to some + /// reasonably low number. + /// + /// This setting has no impact on the results yielded by the iterator + /// (even when `n` is `1`). Instead, this setting represents a trade off + /// between scarce resources (file descriptors) and memory. Namely, when + /// the maximum number of file descriptors is reached and a new directory + /// needs to be opened to continue iteration, then a previous directory + /// handle is closed and has its unyielded entries stored in memory. In + /// practice, this is a satisfying trade off because it scales with respect + /// to the *depth* of your file tree. Therefore, low values (even `1`) are + /// acceptable. + /// + /// Note that this value does not impact the number of system calls made by + /// an exhausted iterator. + /// + /// # Platform behavior + /// + /// On Windows, if `follow_links` is enabled, then this limit is not + /// respected. In particular, the maximum number of file descriptors opened + /// is proportional to the depth of the directory tree traversed. + pub fn max_open(mut self, mut n: usize) -> Self { + if n == 0 { + n = 1; + } + self.opts.max_open = n; + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the base path. + pub fn same_file_system(mut self, yes: bool) -> Self { + self.opts.same_file_system = yes; + self + } + + /// Canonicalize paths via [`std::fs::canonicalize`] (and deduplicate by canonicalized paths). + /// + /// Without this option, the walker will not be able to tell apart "a/b" and "a/../a/b" + /// and will always return both (however, it will still deduplicate non-canonicalized paths). + /// + /// Notes: + /// - This is not free resource-wise as it retrieves entry metadata and resolves links. + /// - If this option is enabled and entry path cannot be canonicalized, error is returned. + /// - Resulting [`DirEntry`] objects will contain canonicalized paths along with resolved metadata. + /// + /// [`std::fs::canonicalize`]: https://doc.rust-lang.org/std/fs/fn.canonicalize.html + /// [`DirEntry`]: struct.DirEntry.html + pub fn canonicalize(mut self) -> Self { + self.opts.canonicalize = true; + self + } +} diff --git a/src/cluster.rs b/src/cluster.rs new file mode 100644 index 0000000..ea07e87 --- /dev/null +++ b/src/cluster.rs @@ -0,0 +1,378 @@ +use std::{ + collections::BTreeMap, + path::{Component, Components, Path, PathBuf}, +}; + +/// Check if a component of the path looks like it may be a glob pattern. +/// +/// Note: this function is being used when splitting a glob pattern into a long possible +/// base and the glob remainder (scanning through components until we hit the first component +/// for which this function returns true). It is acceptable for this function to return +/// false positives (e.g. patterns like 'foo[bar' or 'foo{bar') in which case correctness +/// will not be affected but efficiency might be (because we'll traverse more than we should), +/// however it should not return false negatives. +pub fn is_glob_like(part: Component) -> bool { + matches!(part, Component::Normal(_)) + && part.as_os_str().to_str().is_some_and(crate::util::is_glob_like) +} + +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct GlobParts { + base: PathBuf, + pattern: PathBuf, +} + +/// Split a glob into longest possible base + shortest possible glob pattern. +fn split_glob(pattern: impl AsRef) -> GlobParts { + let pattern: &Path = pattern.as_ref().as_ref(); + + let mut glob = GlobParts::default(); + let mut globbing = false; + let mut last = None; + + for part in pattern.components() { + if let Some(last) = last { + if last != Component::CurDir { + if globbing { + glob.pattern.push(last); + } else { + glob.base.push(last); + } + } + } + if !globbing { + globbing = is_glob_like(part); + } + // we don't know if this part is the last one, defer handling it by one iteration + last = Some(part); + } + + if let Some(last) = last { + // defer handling the last component to prevent draining entire pattern into base + if globbing || matches!(last, Component::Normal(_)) { + glob.pattern.push(last); + } else { + glob.base.push(last); + } + } + glob +} + +/// Classic trie with edges being path components and values being glob patterns. +#[derive(Default, Debug)] +struct Trie<'a> { + children: BTreeMap, Trie<'a>>, + patterns: Vec<&'a Path>, +} + +impl<'a> Trie<'a> { + fn insert(&mut self, mut components: Components<'a>, pattern: &'a Path) { + if let Some(part) = components.next() { + self.children.entry(part).or_default().insert(components, pattern); + } else { + self.patterns.push(pattern); + } + } + + /// Iteratively collects groups of patterns from the Trie (no recursion). + pub fn collect_groups(&self) -> Vec<(PathBuf, Vec)> { + /// Defines the current processing mode for a node on the stack. + enum ModeState { + /// Evaluate if the current node should start a new group or delegate to children. + CollectGroups, + /// Collect patterns for the group currently at the top of `active_group_stack`. + /// The path accumulates the relative path for patterns within the current group. + CollectPatterns(PathBuf), + /// Finalize the group at the top of `active_group_stack` and add it to `out_groups`. + FinalizeGroup, + } + + // The main stack for iterative traversal. Each item includes: + // - A reference to the Trie node to process. + // - The current path context: + // - `CollectGroups` => the prefix that will become the group key if this node is a pivot. + // - `CollectPatterns` => the base path for forming new subgroup keys if a non-normal child is found. + // - `FinalizeGroup` => this path context is the key of the group being finalized. + // - The `ModeState` indicating what action to perform. + let mut stack = vec![(self, PathBuf::new(), ModeState::CollectGroups)]; + + // The final list of (group_key, patterns_list) tuples that will be returned. + let mut out_groups: Vec<(PathBuf, Vec)> = Vec::new(); + + // A stack to manage groups that are currently being built. + // When a pivot node is found (in `CollectGroups`), a new group (group_key, empty_pattern_list) + // is pushed here. `CollectPatterns` adds patterns to the group at the top of this stack. + // `FinalizeGroup` moves the top group from this stack to `out_groups`. + let mut active_group_stack: Vec<(PathBuf, Vec)> = Vec::new(); + + while let Some((node, path_context, mode)) = stack.pop() { + match mode { + ModeState::CollectGroups => { + if node.patterns.is_empty() { + // This node is not a pivot (no patterns directly in it). + // Child nodes might form their own independent groups. + // Push children to the stack to be processed for grouping. + // Iterate children in reverse order because the stack is LIFO, + // ensuring they are processed in their natural BTreeMap order. + for (part, child_node) in node.children.iter().rev() { + stack.push(( + child_node, + path_context.join(part), + ModeState::CollectGroups, + )); + } + } else { + // This node is a pivot point because it contains patterns. + // A new group must be formed here with `path_context` as its key. + + // Add a new group (with an empty pattern list for now) to the active_group_stack. + active_group_stack.push((path_context.clone(), Vec::new())); + + // Schedule the finalization of this new group. This will happen after + // all its patterns (and patterns from normal descendants) are collected. + stack.push(( + node, // node itself doesn't matter here + path_context.clone(), + ModeState::FinalizeGroup, + )); + + // Schedule the collection of patterns for this new group. + stack.push(( + node, + path_context, + ModeState::CollectPatterns(PathBuf::new()), + )); + } + } + + ModeState::CollectPatterns(pattern_prefix) => { + // This state assumes a group is active on `active_group_stack`. + let active_group = active_group_stack.last_mut().unwrap(); + + // Add all patterns from `node` to this active group. Each pattern is prefixed with the + // pattern prefix which represents the path from the group's pivot node down to `node` + // via normal components. + for pattern in &node.patterns { + active_group.1.push(pattern_prefix.join(pattern)); + } + + // Process children of `node`. + for (part, child_node) in node.children.iter().rev() { + let child_path_context = path_context.join(part); + if let Component::Normal(_) = part { + // If the child is connected by a "Normal" component, continue collecting + // patterns for the *current* active group; extend pattern prefix and path context. + stack.push(( + child_node, + child_path_context, + ModeState::CollectPatterns(pattern_prefix.join(part)), + )); + } else { + // If the child is connected by a non-Normal component, it signifies the start + // of a *new*, independent group collection. Push a separate task for this node. + stack.push((child_node, child_path_context, ModeState::CollectGroups)); + } + } + } + + ModeState::FinalizeGroup => { + // The group at the top of `active_group_stack` has had all its patterns collected. + // Move it to the `out_groups`. + out_groups.push(active_group_stack.pop().unwrap()); + } + } + } + + out_groups + } +} + +/// Given a collection of globs, cluster them into (base, globs) groups so that: +/// - base doesn't contain any glob symbols +/// - each directory would only be walked at most once +/// - base of each group is the longest common prefix of globs in the group +pub(crate) fn cluster_globs(patterns: &[impl AsRef]) -> Vec<(PathBuf, Vec)> { + // pub(crate) fn cluster_globs(patterns: &[impl AsRef]) -> Vec<(PathBuf, Vec)> { + // split all globs into base/pattern + let globs: Vec<_> = patterns.iter().map(split_glob).collect(); + + // construct a path trie out of all split globs + let mut trie = Trie::default(); + for glob in &globs { + trie.insert(glob.base.components(), &glob.pattern); + } + + // run LCP-style aggregation of patterns in the trie into groups + let groups = trie.collect_groups(); + + // finally, convert resulting patterns to strings + groups + .into_iter() + .map(|(base, patterns)| { + ( + base, + patterns + .iter() + // NOTE: this unwrap is ok because input patterns are valid utf-8 + .map(|p| p.to_str().unwrap().to_owned()) + .collect(), + ) + }) + .collect() +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + + use super::{cluster_globs, split_glob, GlobParts}; + + use crate::tests::util::windowsify; + + #[test] + fn test_split_glob() { + #[track_caller] + fn check(input: &str, base: &str, pattern: &str, both: bool) { + let result = split_glob(input); + let expected = GlobParts { base: base.into(), pattern: pattern.into() }; + assert_eq!(result, expected, "(1): {input:?} != {base:?} + {pattern:?}"); + + if both { + let result = split_glob(windowsify(input)); + let expected = GlobParts { + base: windowsify(base).into(), + pattern: windowsify(pattern).into(), + }; + assert_eq!(result, expected, "(2): {input:?} != {base:?} + {pattern:?}"); + } + } + + check("", "", "", true); + check("a", "", "a", true); + check("a/b", "a", "b", true); + check("a/b/", "a", "b", true); + check("a/.//b/", "a", "b", true); + check("./a/b/c", "a/b", "c", true); + check("c/d/*", "c/d", "*", true); + check("c/d/*/../*", "c/d", "*/../*", true); + check("a/?b/c", "a", "?b/c", true); + check("/a/b/*", "/a/b", "*", true); + check("../x/*", "../x", "*", true); + check("a/{b,c}/d", "a", "{b,c}/d", true); + check("a/[bc]/d", "a", "[bc]/d", true); + check("*", "", "*", true); + check("*/*", "", "*/*", true); + check("..", "..", "", true); + check("/", "/", "", true); + check("/foo/?", "/foo", "?", true); + check("/foo/bar/*", "/foo/bar", "*", true); + + if cfg!(windows) { + check(r"C:\a/b\c", r"C:\a\b", r"c", false); + check(r"C:\a/b\c/*\d/e", r"C:\a\b\c", r"*\d\e", false); + check(r"C:\*", r"C:\", r"*", false); + check(r"\\a\b\c\d", r"\\a\b\c", r"d", false); + check(r"\\a\b\c/*\d/e", r"\\a\b\c", r"*\d\e", false); + check(r"\\a\b\*", r"\\a\b", r"*", false); + check(r"/a\b\c", r"\a\b", r"c", false); + check(r"/a\b/c\*/d\e", r"\a\b\c", r"*\d\e", false); + check(r"/a/*", r"\a", r"*", false); + check(r"./a/*", r"a", r"*", false); + } + } + + #[test] + fn test_cluster_globs() { + #[track_caller] + fn check(input: &[&str], expected: &[(&str, &[&str])]) { + let input = input.iter().map(windowsify).collect::>(); + + let mut result_sorted = cluster_globs(&input); + for (_, patterns) in &mut result_sorted { + patterns.sort_unstable(); + } + result_sorted.sort_unstable(); + + let mut expected_sorted = Vec::new(); + for (base, patterns) in expected { + let mut patterns_sorted = Vec::new(); + for pattern in *patterns { + patterns_sorted.push(windowsify(pattern)); + } + patterns_sorted.sort_unstable(); + expected_sorted.push((windowsify(base).into(), patterns_sorted)); + } + expected_sorted.sort_unstable(); + + assert_eq!( + result_sorted, expected_sorted, + "{input:?} != {expected_sorted:?} (got: {result_sorted:?})" + ); + } + + check(&["a/b/*", "a/c/*"], &[("a/b", &["*"]), ("a/c", &["*"])]); + check(&["./a/b/*", "a/c/*"], &[("a/b", &["*"]), ("a/c", &["*"])]); + check(&["/a/b/*", "/a/c/*"], &[("/a/b", &["*"]), ("/a/c", &["*"])]); + check(&["../a/b/*", "../a/c/*"], &[("../a/b", &["*"]), ("../a/c", &["*"])]); + check(&["x/*", "y/*"], &[("x", &["*"]), ("y", &["*"])]); + check(&[], &[]); + check(&["./*", "a/*", "../foo/*.png"], &[("", &["*", "a/*"]), ("../foo", &["*.png"])]); + check( + &["?", "/foo/?", "/foo/bar/*", "../bar/*.png", "../bar/../baz/*.jpg"], + &[ + ("", &["?"]), + ("/foo", &["?", "bar/*"]), + ("../bar", &["*.png"]), + ("../bar/../baz", &["*.jpg"]), + ], + ); + check(&["/abs/path/*"], &[("/abs/path", &["*"])]); + check(&["/abs/*", "rel/*"], &[("/abs", &["*"]), ("rel", &["*"])]); + check(&["a/{b,c}/*", "a/d?/*"], &[("a", &["{b,c}/*", "d?/*"])]); + check( + &[ + "../shared/a/[abc].png", + "../shared/a/b/*", + "../shared/b/c/?x/d", + "docs/important/*.{doc,xls}", + "docs/important/very/*", + ], + &[ + ("../shared/a", &["[abc].png", "b/*"]), + ("../shared/b/c", &["?x/d"]), + ("docs/important", &["*.{doc,xls}", "very/*"]), + ], + ); + check(&["file.txt"], &[("", &["file.txt"])]); + check(&["/"], &[("/", &[""])]); + check(&[".."], &[("..", &[""])]); + check(&["file1.txt", "file2.txt"], &[("", &["file1.txt", "file2.txt"])]); + check(&["a/file1.txt", "a/file2.txt"], &[("a", &["file1.txt", "file2.txt"])]); + check( + &["*", "a/b/*", "a/../c/*.jpg", "a/../c/*.png", "/a/*", "/b/*"], + &[ + ("", &["*", "a/b/*"]), + ("a/../c", &["*.jpg", "*.png"]), + ("/a", &["*"]), + ("/b", &["*"]), + ], + ); + + if cfg!(windows) { + check( + &[ + r"\\foo\bar\shared/a/[abc].png", + r"\\foo\bar\shared/a/b/*", + r"\\foo\bar/shared/b/c/?x/d", + r"D:\docs\important/*.{doc,xls}", + r"D:\docs/important/very/*", + ], + &[ + (r"\\foo\bar\shared\a", &["[abc].png", r"b\*"]), + (r"\\foo\bar\shared\b\c", &[r"?x\d"]), + (r"D:\docs\important", &["*.{doc,xls}", r"very\*"]), + ], + ); + } + } +} diff --git a/src/dir.rs b/src/dir.rs new file mode 100644 index 0000000..af5a497 --- /dev/null +++ b/src/dir.rs @@ -0,0 +1,242 @@ +use std::{ + ffi::OsStr, + fmt, + fs::{self}, + io, + path::{Path, PathBuf}, +}; + +// note/credits: most of DirEntryPath-related code is borrowed from walkdir with minor adjustments + +/// A directory entry returned by the glob walker. +/// +/// This is the type of value that is yielded from ['MultiGlobWalker'] iterator. +/// this crate. +/// +/// ### Differences with `std::fs::DirEntry` +/// +/// This type mostly mirrors the type by the same name in [`std::fs`]. There +/// are some differences however: +/// +/// * All recursive directory iterators must inspect the entry's type. +/// Therefore, the value is stored and its access is guaranteed to be cheap and +/// successful. +/// * [`path`] and [`file_name`] return borrowed variants. +/// * If [`follow_links`] was enabled in the builder, then all +/// operations except for [`path`] operate on the link target. Otherwise, all +/// operations operate on the symbolic link. +/// +/// [`MultiGlobWalker`]: struct.MultiGlobWalker.html +/// [`std::fs`]: https://doc.rust-lang.org/stable/std/fs/index.html +/// [`path`]: #method.path +/// [`file_name`]: #method.file_name +/// [`follow_links`]: struct.WalkDir.html#method.follow_links +/// [`DirEntryExt`]: trait.DirEntryExt.html +#[derive(Clone)] +pub struct DirEntry { + inner: DirEntryInner, + canonicalized: Option, +} + +#[derive(Clone)] +enum DirEntryInner { + /// The entry was created from following a direct path link. + Path(DirEntryPath), + /// The entry was created by walking over a glob. + Walk(walkdir::DirEntry), +} + +#[derive(Clone)] +struct DirEntryPath { + /// The path as reported by the [`fs::ReadDir`] iterator (even if it's a + /// symbolic link). + /// + /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html + path: PathBuf, + /// The file type. + ty: fs::FileType, + /// Is set when this entry was created from a symbolic link and the user + /// expects the iterator to follow symbolic links. + follow_link: bool, + /// The underlying metadata (Windows only). We store this on Windows + /// because this comes for free while reading a directory. + #[cfg(windows)] + metadata: fs::Metadata, +} + +fn error_with_path(err: io::Error, path: &Path) -> io::Error { + io::Error::new(err.kind(), format!("{err} (path: {})", path.display())) +} + +impl DirEntryPath { + pub fn from_meta(path: PathBuf, metadata: fs::Metadata, follow: bool) -> Self { + Self { + path, + ty: metadata.file_type(), + follow_link: follow, + #[cfg(windows)] + metadata, + } + } + + #[cfg(windows)] + pub fn metadata(&self) -> io::Result { + if self.follow_link { fs::metadata(&self.path) } else { Ok(self.metadata.clone()) } + .map_err(|err| error_with_path(err, &self.path)) + } + + #[cfg(not(windows))] + pub fn metadata(&self) -> io::Result { + if self.follow_link { fs::metadata(&self.path) } else { fs::symlink_metadata(&self.path) } + .map_err(|err| error_with_path(err, &self.path)) + } +} + +impl DirEntry { + /// The full path that this entry represents. + /// + /// The full path is created by joining the parents of this entry up to the + /// root initially given to [`MultiGlobBuilder::new`] with the file name of this + /// entry. + /// + /// Note that this *always* returns the path reported by the underlying + /// directory entry, even when symbolic links are followed. To get the + /// target path, use [`path_is_symlink`] to (cheaply) check if this entry + /// corresponds to a symbolic link, and [`std::fs::read_link`] to resolve + /// the target. + /// + /// [`path_is_symlink`]: struct.DirEntry.html#method.path_is_symlink + /// [`MultiGlobBuilder::new`]: struct.MultiGlobBuilder.html#method.new + /// [`std::fs::read_link`]: https://doc.rust-lang.org/stable/std/fs/fn.read_link.html + pub fn path(&self) -> &Path { + match &self.inner { + DirEntryInner::Path(e) => &e.path, + DirEntryInner::Walk(e) => e.path(), + } + } + + /// The full path that this entry represents. + /// + /// Analogous to [`path`], but moves ownership of the path. + /// + /// [`path`]: struct.DirEntry.html#method.path + pub fn into_path(self) -> PathBuf { + match self.inner { + DirEntryInner::Path(e) => e.path, + DirEntryInner::Walk(e) => e.into_path(), + } + } + + /// Returns `true` if and only if this entry was created from a symbolic + /// link. This is unaffected by the [`follow_links`] setting. + /// + /// When `true`, the value returned by the [`path`] method is a + /// symbolic link name. To get the full target path, you must call + /// [`std::fs::read_link(entry.path())`]. + /// + /// [`path`]: struct.DirEntry.html#method.path + /// [`follow_links`]: struct.MultiGlobBuilder.html#method.follow_links + /// [`std::fs::read_link(entry.path())`]: https://doc.rust-lang.org/stable/std/fs/fn.read_link.html + pub fn path_is_symlink(&self) -> bool { + match &self.inner { + DirEntryInner::Path(e) => e.ty.is_symlink() || e.follow_link, + DirEntryInner::Walk(e) => e.path_is_symlink(), + } + } + + /// Return the metadata for the file that this entry points to. + /// + /// This will follow symbolic links if and only if the [`MultiGlobBuilder`] value + /// has [`follow_links`] enabled. + /// + /// # Platform behavior + /// + /// This always calls [`std::fs::symlink_metadata`]. + /// + /// If this entry is a symbolic link and [`follow_links`] is enabled, then + /// [`std::fs::metadata`] is called instead. + /// + /// # Errors + /// + /// Similar to [`std::fs::metadata`], returns errors for path values that + /// the program does not have permissions to access or if the path does not + /// exist. + /// + /// [`MultiGlobBuilder`]: struct.MultiGlobBuilder.html + /// [`follow_links`]: struct.MultiGlobBuilder.html#method.follow_links + /// [`std::fs::metadata`]: https://doc.rust-lang.org/std/fs/fn.metadata.html + /// [`std::fs::symlink_metadata`]: https://doc.rust-lang.org/stable/std/fs/fn.symlink_metadata.html + pub fn metadata(&self) -> io::Result { + Ok(match &self.inner { + DirEntryInner::Path(e) => e.metadata()?, + DirEntryInner::Walk(e) => e.metadata()?, + }) + } + + /// Return the file type for the file that this entry points to. + /// + /// If this is a symbolic link and [`follow_links`] is `true`, then this + /// returns the type of the target. + /// + /// This never makes any system calls. + /// + /// [`follow_links`]: struct.MultiGlobBuilder.html#method.follow_links + pub fn file_type(&self) -> fs::FileType { + match &self.inner { + DirEntryInner::Path(e) => e.ty, + DirEntryInner::Walk(e) => e.file_type(), + } + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + let path = self.path(); + path.file_name().unwrap_or(path.as_os_str()) + } + + /// Return canonicalized version of the path, resolving all symlinks. + /// + /// This operation does query file metadata and resolves symlinks so it is + /// not free and may fail, unless the glob walker was initialized with + /// ['canonicalize'] option -- in which case it is free and will never fail. + /// + /// [`canonicalize`]: struct.MultiGlobBuilder.html#method.canonicalize + /// [`std::fs::metadata`]: https://doc.rust-lang.org/std/fs/fn.metadata.html + pub fn canonicalized(&self) -> io::Result { + match self.canonicalized { + Some(ref path) => Ok(path.clone()), + None => fs::canonicalize(self.path()), + } + } + + pub(crate) fn from_meta(path: PathBuf, metadata: fs::Metadata, follow: bool) -> Self { + Self { + inner: DirEntryInner::Path(DirEntryPath::from_meta(path, metadata, follow)), + canonicalized: None, + } + } + + pub(crate) fn from_walk(entry: walkdir::DirEntry) -> Self { + Self { inner: DirEntryInner::Walk(entry), canonicalized: None } + } + + pub(crate) fn into_canonicalized(self) -> io::Result { + let path = fs::canonicalize(self.path())?; + Ok(Self { canonicalized: Some(path), ..self }) + } +} + +impl From for DirEntry { + fn from(entry: walkdir::DirEntry) -> Self { + Self { inner: DirEntryInner::Walk(entry), canonicalized: None } + } +} + +impl fmt::Debug for DirEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DirEntry({:?})", self.path()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..b0e2ecf --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,12 @@ +mod builder; +mod cluster; +mod dir; +mod util; +mod walk; + +#[cfg(test)] +mod tests; + +pub use globset::Error as GlobError; + +pub use crate::{builder::MultiGlobBuilder, dir::DirEntry, walk::MultiGlobWalker}; diff --git a/src/tests/mod.rs b/src/tests/mod.rs new file mode 100644 index 0000000..bdd448d --- /dev/null +++ b/src/tests/mod.rs @@ -0,0 +1,4 @@ +mod test_recursive; +mod test_walk; + +pub mod util; diff --git a/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_posix@cnode.snap b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_posix@cnode.snap new file mode 100644 index 0000000..d74e2bd --- /dev/null +++ b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_posix@cnode.snap @@ -0,0 +1,108 @@ +--- +source: src/walk.rs +expression: "&format!(\"{cnode:#?}\")" +--- +Path { + paths: [ + "..", + "/", + "foo", + "x", + ], + destinations: [ + Path { + paths: [ + "..", + "x", + ], + destinations: [ + Path { + paths: [ + "a", + ], + destinations: [ + Terminal, + ], + }, + Glob { + globset: "..", + recursive: true, + destinations: [ + Terminal, + Terminal, + Terminal, + Terminal, + ], + }, + ], + }, + Path { + paths: [ + "home", + "var", + ], + destinations: [ + Path { + paths: [ + "user", + ], + destinations: [ + Terminal, + ], + }, + Path { + paths: [ + "folders", + ], + destinations: [ + Glob[T] { + globset: "..", + recursive: false, + destinations: [ + Terminal, + Path { + paths: [ + "2", + ], + destinations: [ + Terminal, + ], + }, + ], + }, + ], + }, + ], + }, + Path { + paths: [ + "bar", + ], + destinations: [ + Path[T] { + paths: [ + "..", + ], + destinations: [ + Path { + paths: [ + "z", + ], + destinations: [ + Terminal, + ], + }, + ], + }, + ], + }, + Path { + paths: [ + "y", + ], + destinations: [ + Terminal, + ], + }, + ], +} diff --git a/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_posix@node.snap b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_posix@node.snap new file mode 100644 index 0000000..53a3aaf --- /dev/null +++ b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_posix@node.snap @@ -0,0 +1,40 @@ +--- +source: src/walk.rs +expression: "&format!(\"{node:#?}\")" +--- +Path { + ..: Path { + ..: Path { + a: Terminal, + }, + x: Walk { + **: Terminal, + **/y: Terminal, + **/z/*: Terminal, + y: Terminal, + }, + }, + /: Path { + home: Path { + user: Terminal, + }, + var: Path { + folders: Glob[T] { + *.doc: Terminal, + 1: Path { + 2: Terminal, + }, + }, + }, + }, + foo: Path { + bar: Path[T] { + ..: Path { + z: Terminal, + }, + }, + }, + x: Path { + y: Terminal, + }, +} diff --git a/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_win@cnode.snap b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_win@cnode.snap new file mode 100644 index 0000000..8c2f0a8 --- /dev/null +++ b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_win@cnode.snap @@ -0,0 +1,150 @@ +--- +source: src/walk.rs +expression: "&format!(\"{cnode:#?}\")" +--- +Path { + paths: [ + "..", + "C:\\", + "\\", + "\\\\unc\\share\\", + "foo", + "x", + ], + destinations: [ + Path { + paths: [ + "..", + "x", + ], + destinations: [ + Path { + paths: [ + "a", + ], + destinations: [ + Terminal, + ], + }, + Glob { + globset: "..", + recursive: true, + destinations: [ + Terminal, + Terminal, + Terminal, + Terminal, + ], + }, + ], + }, + Path { + paths: [ + "var", + ], + destinations: [ + Path { + paths: [ + "folders", + ], + destinations: [ + Glob { + globset: "..", + recursive: false, + destinations: [ + Terminal, + Glob { + globset: "..", + recursive: false, + destinations: [ + Terminal, + ], + }, + ], + }, + ], + }, + ], + }, + Path { + paths: [ + "var", + ], + destinations: [ + Path { + paths: [ + "folders", + ], + destinations: [ + Path[T] { + paths: [ + "1", + ], + destinations: [ + Path { + paths: [ + "2", + ], + destinations: [ + Terminal, + ], + }, + ], + }, + ], + }, + ], + }, + Path { + paths: [ + "foo", + ], + destinations: [ + Glob { + globset: "..", + recursive: false, + destinations: [ + Glob { + globset: "..", + recursive: false, + destinations: [ + Terminal, + ], + }, + Terminal, + ], + }, + ], + }, + Path { + paths: [ + "bar", + ], + destinations: [ + Path[T] { + paths: [ + "..", + ], + destinations: [ + Path { + paths: [ + "z", + ], + destinations: [ + Terminal, + ], + }, + ], + }, + ], + }, + Path { + paths: [ + "y", + ], + destinations: [ + Terminal, + ], + }, + ], +} diff --git a/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_win@node.snap b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_win@node.snap new file mode 100644 index 0000000..1325d4d --- /dev/null +++ b/src/tests/snapshots/multiglob__walk__tests__walk_plan_node_win@node.snap @@ -0,0 +1,54 @@ +--- +source: src/walk.rs +expression: "&format!(\"{node:#?}\")" +--- +Path { + ..: Path { + ..: Path { + a: Terminal, + }, + x: Walk { + **: Terminal, + **\y: Terminal, + **\z\*: Terminal, + y: Terminal, + }, + }, + C:\: Path { + var: Path { + folders: Glob { + *.doc: Terminal, + secret: Glob { + *.txt: Terminal, + }, + }, + }, + }, + \: Path { + var: Path { + folders: Path[T] { + 1: Path { + 2: Terminal, + }, + }, + }, + }, + \\unc\share\: Path { + foo: Glob { + *: Glob { + *: Terminal, + }, + [ab].txt: Terminal, + }, + }, + foo: Path { + bar: Path[T] { + ..: Path { + z: Terminal, + }, + }, + }, + x: Path { + y: Terminal, + }, +} diff --git a/src/tests/test_recursive.rs b/src/tests/test_recursive.rs new file mode 100644 index 0000000..2e5b413 --- /dev/null +++ b/src/tests/test_recursive.rs @@ -0,0 +1,125 @@ +use std::path::Path; + +use current_dir::Cwd; +use pretty_assertions::assert_eq; +use rstest::rstest; +use walkdir::WalkDir; + +use crate::{DirEntry, MultiGlobBuilder, MultiGlobWalker}; + +use super::util::{Dir, RecursiveResults, Result}; + +#[track_caller] +fn assert_ent_eq(a: &DirEntry, b: &DirEntry) { + let cmp = |d: &DirEntry| { + let md = d.metadata().ok(); + ( + d.path().to_path_buf(), + d.file_type(), + d.path_is_symlink(), + md.as_ref().map(|md| md.file_type()), + #[cfg(not(windows))] + md.as_ref().and_then(|md| md.modified().ok()), + ) + }; + assert_eq!(cmp(a), cmp(b)); +} + +#[track_caller] +fn assert_mg_eq_wd(mg: MultiGlobWalker, wd: WalkDir) { + let ents_mg = RecursiveResults::collect(mg); + let ents_wd = RecursiveResults::collect(wd); + ents_mg.assert_no_errors(); + ents_wd.assert_no_errors(); + assert_eq!(ents_mg.sorted_paths(), ents_wd.sorted_paths()); + for (mg, wd) in ents_mg.sorted_ents().into_iter().zip(ents_wd.sorted_ents()) { + assert_ent_eq(&mg, &wd); + } +} + +fn setup_dir_with_syms() -> Dir { + let dir = Dir::tmp(); + let base = "base/x/y"; + dir.mkdirp(base); + dir.mkdirp("a/b"); + dir.symlink_dir("a", format!("{base}/asym")); + dir.symlink_dir("a/b", "a/bsym"); + dir.touch("a/b/c"); + dir +} + +#[rstest] +fn test_double_star_at_root( + #[values("base/x/y", "base/x/y/asym")] base: &str, + #[values(false, true)] follow_links: bool, +) -> Result<()> { + let dir = setup_dir_with_syms(); + let base = dir.path().join(base); + assert_mg_eq_wd( + MultiGlobBuilder::new(&base, ["**"]).follow_links(follow_links).build().unwrap(), + WalkDir::new(base).follow_links(follow_links), + ); + Ok(()) +} + +#[rstest] +fn test_double_star_at_root_rel( + #[values("../y", "../y/asym")] base: &str, + #[values(false, true)] follow_links: bool, +) -> Result<()> { + let dir = setup_dir_with_syms(); + let mut cwd = Cwd::mutex().lock().unwrap(); + cwd.set(dir.path().join("base/x/y")).unwrap(); + assert_mg_eq_wd( + MultiGlobBuilder::new(base, ["**"]).follow_links(follow_links).build().unwrap(), + WalkDir::new(base).follow_links(follow_links), + ); + Ok(()) +} + +#[rstest] +fn test_double_star_at_path( + #[values("x/y", "x/y/asym", "x/y/asym/b")] path: &str, + #[values(false, true)] follow_links: bool, +) -> Result<()> { + let dir = setup_dir_with_syms(); + let base = dir.path().join("base"); + assert_mg_eq_wd( + MultiGlobBuilder::new(&base, [format!("{path}/**")]) + .follow_links(follow_links) + .build() + .unwrap(), + WalkDir::new(base.join(path)).follow_links(follow_links), + ); + Ok(()) +} + +#[rstest] +fn test_double_star_at_path_rel( + #[values("../y", "../y/asym")] path: &str, + #[values(false, true)] follow_links: bool, +) -> Result<()> { + let dir = setup_dir_with_syms(); + let mut cwd = Cwd::mutex().lock().unwrap(); + cwd.set(dir.path().join("base/x")).unwrap(); + let base = Path::new("y"); + assert_mg_eq_wd( + MultiGlobBuilder::new(base, [format!("{path}/**")]) + .follow_links(follow_links) + .build() + .unwrap(), + WalkDir::new(base.join(path)).follow_links(follow_links), + ); + Ok(()) +} + +#[rstest] +fn test_double_star_with_max_depth() -> Result<()> { + let dir = setup_dir_with_syms(); + let base = dir.path().join("base"); + assert_mg_eq_wd( + MultiGlobBuilder::new(&base, ["x/**"]).follow_links(true).max_depth(2).build().unwrap(), + WalkDir::new(base.join("x")).follow_links(true).max_depth(2), + ); + Ok(()) +} diff --git a/src/tests/test_walk.rs b/src/tests/test_walk.rs new file mode 100644 index 0000000..801ed3f --- /dev/null +++ b/src/tests/test_walk.rs @@ -0,0 +1,332 @@ +use std::path::{Path, PathBuf}; + +use current_dir::Cwd; +use pretty_assertions::assert_eq; + +use crate::MultiGlobBuilder; + +use super::util::{Dir, RecursiveResults}; + +fn setup_dir_with_syms() -> Dir { + let dir = Dir::tmp(); + let base = "base/x"; + dir.mkdirp(base); + dir.mkdirp("a/b"); + dir.symlink_dir("a", format!("{base}/asym")); + dir.symlink_dir("a/b", "a/bsym"); + dir.touch("a/b/c"); + dir.touch(format!("{base}/d.1")); + dir.touch(format!("{base}/d.2")); + dir.touch(format!("{base}/d.3")); + dir +} + +fn mg_collect_no_err(base: B, patterns: P) -> RecursiveResults +where + B: AsRef, + P: IntoIterator, + S: AsRef, +{ + mg_collect_custom(base, patterns, |x| x) +} + +fn mg_collect_custom( + base: B, + patterns: P, + build: impl Fn(MultiGlobBuilder) -> MultiGlobBuilder, +) -> RecursiveResults +where + B: AsRef, + P: IntoIterator, + S: AsRef, +{ + RecursiveResults::collect(build(MultiGlobBuilder::new(base, patterns)).build().unwrap()) +} + +#[test] +fn test_walk_missing() { + let dir = setup_dir_with_syms(); + let p = dir.path(); + + let res = mg_collect_no_err(p.join("base/x"), ["", "asym", "wrong"]); + assert_eq!(res.sorted_paths(), vec![p.join("base/x"), p.join("base/x/asym")]); + + let res = mg_collect_no_err(p.join("base/xyz"), ["", "asym", "wrong"]); + assert_eq!(res.sorted_paths(), Vec::::new()); + + let res = mg_collect_no_err("nope", ["**"]); + assert_eq!(res.sorted_paths(), Vec::::new()); + + let res = mg_collect_no_err("nope", ["*"]); + assert_eq!(res.sorted_paths(), Vec::::new()); + + let res = mg_collect_no_err("../nope", ["*"]); + assert_eq!(res.sorted_paths(), Vec::::new()); +} + +#[test] +fn test_walk_path() { + let dir = setup_dir_with_syms(); + let p = dir.path(); + + let res = mg_collect_no_err(p.join("base/x"), &[] as &[&str]); + assert_eq!(res.sorted_paths(), Vec::::new()); + + let res = mg_collect_no_err(p.join("a"), ["b"]); + assert_eq!(res.sorted_paths(), vec![p.join("a/b")]); + + let res = mg_collect_no_err(p.join("a"), ["b/c"]); + assert_eq!(res.sorted_paths(), vec![p.join("a/b/c")]); + + let res = mg_collect_no_err(p.join("a"), ["b", "b/c"]); + assert_eq!(res.sorted_paths(), vec![p.join("a/b"), p.join("a/b/c")]); + + let res = mg_collect_no_err(p.join("base/x"), ["."]); + assert_eq!(res.sorted_paths(), vec![p.join("base/x")]); + + let res = mg_collect_no_err(p.join("base/x"), [""]); + assert_eq!(res.sorted_paths(), vec![p.join("base/x")]); +} + +#[test] +fn test_walk_glob() { + let dir = setup_dir_with_syms(); + let p = dir.path(); + + let res = mg_collect_no_err(p.join("base/x"), ["a*"]); + assert_eq!(res.sorted_paths(), vec![p.join("base/x/asym")]); + + let res = mg_collect_no_err(p.join("base/x"), ["d.{1,2}", "asym"]); + assert_eq!( + res.sorted_paths(), + vec![p.join("base/x/asym"), p.join("base/x/d.1"), p.join("base/x/d.2")] + ); + + let res = mg_collect_no_err(p.join("base/x"), ["d.[12]", "asym"]); + assert_eq!( + res.sorted_paths(), + vec![p.join("base/x/asym"), p.join("base/x/d.1"), p.join("base/x/d.2")] + ); + + let res = mg_collect_no_err(p.join("base/x"), ["d.{1,2}", "."]); + assert_eq!( + res.sorted_paths(), + vec![p.join("base/x"), p.join("base/x/d.1"), p.join("base/x/d.2")] + ); +} + +#[test] +fn test_walk_rel() { + let dir = setup_dir_with_syms(); + let p = dir.path(); + + let mut cwd = Cwd::mutex().lock().unwrap(); + cwd.set(p.join("base/x")).unwrap(); + + let res = mg_collect_no_err("", &[] as &[&str]); + assert_eq!(res.sorted_paths(), Vec::::new()); + + for b in ["", "."] { + for p in ["", "."] { + let res = mg_collect_no_err(b, [p]); + assert_eq!(res.sorted_paths(), vec![PathBuf::from(".")]); + } + } + + let res = mg_collect_no_err(".", ["d.1"]); + assert_eq!(res.sorted_paths(), vec![PathBuf::from("./d.1")]); + + let res = mg_collect_no_err(".", ["d.[12]", ""]); + assert_eq!( + res.sorted_paths(), + vec![PathBuf::from("."), PathBuf::from("./d.1"), PathBuf::from("./d.2")] + ); + + let res = mg_collect_no_err("..", [""]); + assert_eq!(res.sorted_paths(), vec![PathBuf::from("..")]); + + let res = mg_collect_no_err(".", [".."]); + assert_eq!(res.sorted_paths(), vec![PathBuf::from("./..")]); + + let res = mg_collect_no_err("", [".."]); + assert_eq!(res.sorted_paths(), vec![PathBuf::from("./..")]); +} + +#[test] +fn test_walk_loop() { + let dir = Dir::tmp(); + dir.mkdirp("x/base/a/b"); + dir.symlink_dir("x", "x/base/a/b/c"); + let p = dir.path(); + + let res = mg_collect_no_err(p.join("x/base"), ["**"]); + assert_eq!( + res.sorted_paths(), + vec![p.join("x/base"), p.join("x/base/a"), p.join("x/base/a/b"), p.join("x/base/a/b/c")] + ); + + let res = mg_collect_custom(p.join("x/base"), ["**"], |b| b.follow_links(true)); + assert_eq!(res.errs().len(), 1); + // walkdir behaviour + assert_eq!( + res.sorted_paths(), + vec![ + p.join("x/base"), + p.join("x/base/a"), + p.join("x/base/a/b"), + p.join("x/base/a/b/c"), + p.join("x/base/a/b/c/base"), + p.join("x/base/a/b/c/base/a"), + p.join("x/base/a/b/c/base/a/b") + ] + ); +} + +#[test] +fn test_glob_parent_dir() { + let dir = Dir::tmp(); + dir.mkdirp("a/b"); + dir.mkdirp("a/c"); + let p = dir.path(); + + let res = mg_collect_no_err(p, ["*"]); + assert_eq!(res.sorted_paths(), vec![p.join("a")]); + let res = mg_collect_no_err(p.join("a"), ["*"]); + assert_eq!(res.sorted_paths(), vec![p.join("a/b"), p.join("a/c")]); + let res = mg_collect_no_err(p.join("a"), ["*", "."]); + assert_eq!(res.sorted_paths(), vec![p.join("a"), p.join("a/b"), p.join("a/c")]); + let res = mg_collect_no_err(p.join("a"), ["*", "**"]); + assert_eq!(res.sorted_paths(), vec![p.join("a"), p.join("a/b"), p.join("a/c")]); +} + +#[test] +fn test_case_sensitive() { + let dir = Dir::tmp(); + dir.mkdirp("a/B"); + dir.touch("a/B/d"); + let p = dir.path(); + + let res = mg_collect_custom(p, ["a/**"], |b| b); + assert_eq!(res.sorted_paths(), vec![p.join("a"), p.join("a/B"), p.join("a/B/d")]); + let res = mg_collect_custom(p, ["a/{b}/*"], |b| b); + assert_eq!(res.sorted_paths(), Vec::::new()); + let res = mg_collect_custom(p, ["a/{b}/*"], |b| b.case_insensitive(false)); + assert_eq!(res.sorted_paths(), Vec::::new()); + let res = mg_collect_custom(p, ["a/{b}/*"], |b| b.case_insensitive(true)); + assert_eq!(res.sorted_paths(), vec![p.join("a/B/d")]); +} + +#[test] +fn test_symlink_file() { + let dir = Dir::tmp(); + dir.mkdirp("a"); + dir.touch("a/b"); + dir.symlink_file("a/b", "a/c"); + let p = dir.path(); + + let res = mg_collect_no_err(p, ["a/*"]); + assert_eq!(res.sorted_paths(), vec![p.join("a/b"), p.join("a/c")]); + assert!(!res.sorted_ents()[0].path_is_symlink()); + assert!(res.sorted_ents()[0].file_type().is_file()); + assert!(res.sorted_ents()[1].path_is_symlink()); + assert!(!res.sorted_ents()[1].file_type().is_file()); + + let res = mg_collect_custom(p, ["a/*"], |b| b.follow_links(true)); + assert_eq!(res.sorted_paths(), vec![p.join("a/b"), p.join("a/c")]); + assert!(!res.sorted_ents()[0].path_is_symlink()); + assert!(res.sorted_ents()[0].file_type().is_file()); + assert!(res.sorted_ents()[1].path_is_symlink()); + assert!(res.sorted_ents()[1].file_type().is_file()); +} + +#[test] +fn test_invalid_glob() { + let dir = Dir::tmp(); + dir.mkdirp("a/x"); + dir.touch("a/x/b"); + let p = dir.path(); + + let b = MultiGlobBuilder::new(p.join("a"), ["x/*", "y/{", "z/["]); + + let err = b.build().err().unwrap(); + assert_eq!(err.glob().unwrap(), "y/{"); + + let (walker, errors) = b.build_skip_invalid(); + assert_eq!(errors.len(), 2); + assert_eq!(errors[0].glob().unwrap(), "y/{"); + assert_eq!(errors[1].glob().unwrap(), "z/["); + assert_eq!( + walker.map(|e| e.unwrap().path().to_owned()).collect::>(), + vec![p.join("a/x/b")] + ); +} + +#[test] +fn test_bigger_walk() { + let dir = Dir::tmp(); + dir.mkdirp("a/b/c"); + dir.touch("a/b/c/x.doc"); + dir.touch("a/b/c/y.doc"); + dir.touch("a/b/c/z1.txt"); + dir.touch("a/b/c/z2.txt"); + dir.touch("a/b/c/z3.txt"); + dir.mkdirp("x/y/b/c/d"); + dir.touch("x/y/b/c/d/a1.doc"); + dir.touch("x/y/b/c/d/a2.doc"); + dir.touch("x/y/b/c/d/a3.doc"); + let p = dir.path(); + + let res = mg_collect_no_err( + p.join("a"), + [ + ".", + "..", + "../x/**/*[13].d*", + "../**/*/c", + "b/*", + &p.join("a/b/*/z{1,3}.*").display().to_string(), + "b/c/../*/y*", + ], + ); + assert_eq!( + res.sorted_paths(), + vec![ + p.join("a"), + p.join("a/.."), + p.join("a/../a/b/c"), + p.join("a/../x/y/b/c"), + p.join("a/../x/y/b/c/d/a1.doc"), + p.join("a/../x/y/b/c/d/a3.doc"), + p.join("a/b/c"), // NOTE: it's the same path as before but we can't tell that unless we query metadata + p.join("a/b/c/../c/y.doc"), + p.join("a/b/c/z1.txt"), + p.join("a/b/c/z3.txt"), + ] + ); +} + +#[test] +fn test_canonicalized() { + let dir = Dir::tmp(); + dir.mkdirp("a/b/c"); + dir.touch("a/b/c/d"); + let p = dir.path(); + + let patterns = [ + &p.join("a/b").display().to_string(), + &p.join("a/b/c").display().to_string(), + "a/*/c", + "a/*", + "../a/b", + "../a/b/c", + "../*/b", + "a/*/../b/c", + ]; + let res = mg_collect_no_err(p.join("a"), patterns); + assert_eq!( + res.sorted_paths(), + vec![p.join("a/../a/b"), p.join("a/../a/b/c"), p.join("a/b"), p.join("a/b/c"),] + ); + let res = mg_collect_custom(p.join("a"), patterns, |b| b.canonicalize()); + assert_eq!(res.sorted_paths(), vec![p.join("a/b"), p.join("a/b/c")]); +} diff --git a/src/tests/util.rs b/src/tests/util.rs new file mode 100644 index 0000000..dbb01b3 --- /dev/null +++ b/src/tests/util.rs @@ -0,0 +1,187 @@ +use std::error; +use std::fs::{self, File}; +use std::io; +use std::path::{Path, PathBuf}; +use std::result; + +use crate::DirEntry; + +type Error = std::io::Error; + +// note/credits: some of this is borrowed from walkdir crate's tests + +/// Create an error from a format!-like syntax. +#[macro_export] +macro_rules! err { + ($($tt:tt)*) => { + Box::::from(format!($($tt)*)) + } +} + +pub fn windowsify(path: impl AsRef) -> String { + // only works for paths strings themselves (i.e. not strings containing paths) + let mut path = path.as_ref().to_owned(); + if cfg!(windows) { + if path.starts_with("/") { + path = path.replacen('/', "C:\\", 1); + } + path = path.replace('/', "\\"); + } + path +} + +/// A convenient result type alias. +pub type Result = result::Result>; + +/// The result of running a recursive directory iterator on a single directory. +#[derive(Debug)] +pub struct RecursiveResults { + ents: Vec, + errs: Vec, +} + +impl RecursiveResults { + /// Run the given iterator and return the result as a distinct collection + /// of directory entries and errors. + pub fn collect(it: I) -> Self + where + I: IntoIterator>, + D: Into, + E: Into, + { + let mut results = Self { ents: vec![], errs: vec![] }; + for result in it { + match result { + Ok(ent) => results.ents.push(ent.into()), + Err(err) => results.errs.push(err.into()), + } + } + results + } + + /// Return all of the errors encountered during traversal. + pub fn errs(&self) -> &[Error] { + &self.errs + } + + /// Assert that no errors have occurred. + #[track_caller] + pub fn assert_no_errors(&self) { + assert!(self.errs.is_empty(), "expected to find no errors, but found: {:?}", self.errs); + } + + /// Return all the successfully retrieved directory entries, sorted + /// lexicographically by their full file path. + pub fn sorted_ents(&self) -> Vec { + let mut ents = self.ents.clone(); + ents.sort_by(|e1, e2| e1.path().cmp(e2.path())); + ents + } + + /// Return all paths from all successfully retrieved directory entries, + /// sorted lexicographically. + /// + /// This does not include paths that correspond to an error. + pub fn sorted_paths(&self) -> Vec { + self.sorted_ents().into_iter().map(|d| d.into_path()).collect() + } +} + +/// A helper for managing a directory in which to run tests. +/// +/// When manipulating paths within this directory, paths are interpreted +/// relative to this directory. +#[derive(Debug)] +pub struct Dir { + dir: tempfile::TempDir, +} + +impl Dir { + /// Create a new empty temporary directory. + pub fn tmp() -> Dir { + let dir = tempfile::TempDir::new().unwrap(); + Dir { dir } + } + + /// Return the path to this directory. + pub fn path(&self) -> &Path { + self.dir.path() + } + + /// Return a path joined to the path to this directory. + pub fn join>(&self, path: P) -> PathBuf { + self.path().join(path) + } + + /// Create a directory at the given path, while creating all intermediate + /// directories as needed. + pub fn mkdirp>(&self, path: P) { + let full = self.join(path); + fs::create_dir_all(&full) + .map_err(|e| err!("failed to create directory {}: {}", full.display(), e)) + .unwrap(); + } + + /// Create an empty file at the given path. All ancestor directories must + /// already exists. + pub fn touch>(&self, path: P) { + let full = self.join(path); + File::create(&full) + .map_err(|e| err!("failed to create file {}: {}", full.display(), e)) + .unwrap(); + } + + /// Create a file symlink to the given src with the given link name. + pub fn symlink_file, P2: AsRef>(&self, src: P1, link_name: P2) { + #[cfg(windows)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::windows::fs::symlink_file; + symlink_file(src, link_name) + } + + #[cfg(unix)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::unix::fs::symlink; + symlink(src, link_name) + } + + let (src, link_name) = (self.join(src), self.join(link_name)); + imp(&src, &link_name) + .map_err(|e| { + err!( + "failed to symlink file {} with target {}: {}", + src.display(), + link_name.display(), + e + ) + }) + .unwrap() + } + + /// Create a directory symlink to the given src with the given link name. + pub fn symlink_dir, P2: AsRef>(&self, src: P1, link_name: P2) { + #[cfg(windows)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::windows::fs::symlink_dir; + symlink_dir(src, link_name) + } + + #[cfg(unix)] + fn imp(src: &Path, link_name: &Path) -> io::Result<()> { + use std::os::unix::fs::symlink; + symlink(src, link_name) + } + + let (src, link_name) = (self.join(src), self.join(link_name)); + imp(&src, &link_name) + .map_err(|e| { + err!( + "failed to symlink directory {} with target {}: {}", + src.display(), + link_name.display(), + e + ) + }) + .unwrap() + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..93bb639 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,39 @@ +use std::{io, path::Path}; + +/// Check if a component of a path looks like it may be a glob pattern. +/// +/// Note: this function is being used when splitting a glob pattern into a long possible +/// base and the glob remainder (scanning through components until we hit the first component +/// for which this function returns true). It is acceptable for this function to return +/// false positives (e.g. patterns like 'foo[bar' or 'foo{bar') in which case correctness +/// will not be affected but efficiency might be (because we'll traverse more than we should), +/// however it should not return false negatives. +pub fn is_glob_like(part: &str) -> bool { + ["*", "{", "}", "?", "[", "]"].into_iter().any(|c| part.contains(c)) +} + +#[cfg(unix)] +pub fn device_num>(path: P) -> io::Result { + // borrowed from walkdir crate + use std::os::unix::fs::MetadataExt; + + path.as_ref().metadata().map(|md| md.dev()) +} + +#[cfg(windows)] +pub fn device_num>(path: P) -> io::Result { + // borrowed from walkdir crate + use winapi_util::{file, Handle}; + + let h = Handle::from_path_any(path)?; + file::information(h).map(|info| info.volume_serial_number()) +} + +#[cfg(not(any(unix, windows)))] +pub fn device_num>(_: P) -> io::Result { + // borrowed from walkdir crate + Err(io::Error::new( + io::ErrorKind::Other, + "walkdir: same_file_system option not supported on this platform", + )) +} diff --git a/src/walk.rs b/src/walk.rs new file mode 100644 index 0000000..6d68db2 --- /dev/null +++ b/src/walk.rs @@ -0,0 +1,532 @@ +use std::{ + collections::{BTreeMap, HashSet}, + fmt, fs, io, mem, + path::{Component, Path, PathBuf}, + sync::Arc, +}; + +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use walkdir::WalkDir; + +use crate::{ + builder::MultiGlobOptions, + util::{device_num, is_glob_like}, + DirEntry, GlobError, +}; + +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +enum WalkNodeType { + #[default] + Path, + Glob, + Walk, +} + +#[derive(Default, Clone)] +struct WalkPlanNode { + node_type: WalkNodeType, + is_terminal: bool, + patterns: BTreeMap, +} + +impl WalkPlanNode { + pub fn build(patterns: &[impl AsRef]) -> Self { + let mut root = Self::default(); + for pattern in patterns { + let components: Vec<_> = Path::new(pattern.as_ref()).components().collect(); + let mut components = &components[..]; + let mut parts = Vec::new(); + let mut prefix = PathBuf::new(); + if components.len() >= 2 + && matches!(components[0], Component::Prefix(_)) + && matches!(components[1], Component::RootDir) + { + prefix.push(components[0]); + prefix.push(Component::RootDir); + parts.push(prefix.as_os_str().to_str().unwrap()); + components = &components[2..]; + } + for component in components { + parts.push(component.as_os_str().to_str().unwrap()); + } + root.insert(&parts); + } + // root.optimize(); + root + } + + pub fn terminal() -> Self { + Self { is_terminal: true, ..Self::default() } + } + + pub fn insert(&mut self, parts: &[&str]) { + let Some((&part, tail)) = parts.split_first() else { + self.is_terminal = true; + return; + }; + let make_path = || parts.iter().collect::().to_str().unwrap().to_owned(); + if self.node_type == WalkNodeType::Walk { + self.patterns.insert(make_path(), Self::terminal()); + return; + } + let part = part.to_owned(); + if part.contains("**") { + self.node_type = WalkNodeType::Walk; + let mut patterns = Vec::new(); + self.collect(PathBuf::new(), &mut patterns); + assert!(self.patterns.is_empty()); + for pattern in patterns { + self.patterns.insert(pattern, Self::terminal()); + } + self.patterns.insert(make_path(), Self::terminal()); + } else if is_glob_like(&part) { + self.node_type = WalkNodeType::Glob; + self.patterns.entry(part).or_default().insert(tail); + } else { + self.patterns.entry(part).or_default().insert(tail); + } + } + + pub fn collect(&mut self, path: PathBuf, out: &mut Vec) { + for (k, mut v) in mem::take(&mut self.patterns) { + let path = path.join(k); + if v.is_terminal { + out.push(path.to_str().unwrap().to_owned()); + } + v.collect(path, out); + } + } + + // pub fn optimize(&mut self) { + // // squash pure-path component trees into pure-path nodes with multi-part paths + // // note: this code would only make sense if we always resolved all symlinks for path components + // + // for v in self.patterns.values_mut() { + // v.optimize(); + // } + // if self.node_type != WalkNodeType::Path { + // return; + // } + // let mut patterns = BTreeMap::new(); + // for (k, mut v) in mem::take(&mut self.patterns) { + // if v.node_type == WalkNodeType::Path { + // if v.is_terminal { + // patterns.insert(k.clone(), Self::terminal()); + // } + // for (pk, pv) in mem::take(&mut v.patterns) { + // patterns.insert(Path::new(&k).join(&pk).to_str().unwrap().to_owned(), pv); + // } + // } else { + // patterns.insert(k, v); + // } + // } + // self.patterns = patterns; + // } +} + +impl fmt::Debug for WalkPlanNode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let name = if self.patterns.is_empty() && self.is_terminal { + "Terminal".into() + } else { + let t = if self.is_terminal { "[T]" } else { "" }; + format!("{:?}{t}", self.node_type) + }; + let mut s = f.debug_struct(&name); + for (k, v) in &self.patterns { + s.field(k, &v); + } + s.finish() + } +} + +#[derive(Clone)] +enum WalkNodeMatcher { + Path { paths: Vec }, + Walk { globset: GlobSet, recursive: bool }, +} + +#[derive(Clone)] +struct WalkPlanNodeCompiled { + matcher: WalkNodeMatcher, + is_terminal: bool, + destinations: Vec, +} + +impl WalkPlanNodeCompiled { + pub fn new(node: &WalkPlanNode, case_insensitive: bool, errors: &mut Vec) -> Self { + let mut destinations = Vec::new(); + let matcher = if node.node_type == WalkNodeType::Path { + destinations.extend(node.patterns.values().cloned()); + WalkNodeMatcher::Path { paths: node.patterns.keys().cloned().collect() } + } else { + let mut globset = GlobSetBuilder::new(); + for (k, v) in &node.patterns { + let glob = match GlobBuilder::new(k).case_insensitive(case_insensitive).build() { + Ok(glob) => glob, + Err(err) => { + errors.push(err); + continue; + } + }; + globset.add(glob); + destinations.push(v.clone()); + } + let globset = match globset.build() { + Ok(globset) => globset, + Err(err) => { + errors.push(err); + destinations.clear(); + GlobSet::empty() + } + }; + let recursive = node.node_type == WalkNodeType::Walk; + WalkNodeMatcher::Walk { globset, recursive } + }; + let destinations = + destinations.iter().map(|d| Self::new(d, case_insensitive, errors)).collect(); + Self { matcher, is_terminal: node.is_terminal, destinations } + } +} + +impl fmt::Debug for WalkPlanNodeCompiled { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let name = if self.destinations.is_empty() && self.is_terminal { + "Terminal".into() + } else { + let t = if self.is_terminal { "[T]" } else { "" }; + let n = + if matches!(self.matcher, WalkNodeMatcher::Path { .. }) { "Path" } else { "Glob" }; + format!("{n}{t}") + }; + let mut s = f.debug_struct(&name); + match &self.matcher { + WalkNodeMatcher::Path { paths } => { + if !paths.is_empty() { + s.field("paths", &paths); + } + } + WalkNodeMatcher::Walk { recursive, .. } => { + s.field("globset", &"..").field("recursive", recursive); + } + } + if !self.destinations.is_empty() { + s.field("destinations", &self.destinations); + } + s.finish() + } +} + +enum NodeWalkerState { + Path { paths: Vec, index: usize }, + Walk { globset: GlobSet, walker: walkdir::IntoIter, base_checked: bool, recursive: bool }, +} + +type WalkDirFn = Arc WalkDir + Send + Sync + 'static>; + +#[derive(Default)] +struct NodeWalkerOutput { + terminal: Option, + nodes: Vec, +} + +struct NodeWalker { + base: PathBuf, + state: NodeWalkerState, + destinations: Vec, + index_buf: Vec, + walkdir_fn: WalkDirFn, + opts: MultiGlobOptions, + yield_self: bool, + root_device: Option, +} + +impl NodeWalker { + pub fn new( + node: WalkPlanNodeCompiled, + base: PathBuf, + walkdir_fn: WalkDirFn, + opts: MultiGlobOptions, + starting_node: bool, + root_device: Option, + ) -> Self { + let state = match node.matcher { + WalkNodeMatcher::Path { paths } => { + let paths = paths.iter().map(|p| base.join(p)).collect(); + NodeWalkerState::Path { paths, index: 0 } + } + WalkNodeMatcher::Walk { globset, recursive } => { + let max_depth = if recursive { opts.max_depth } else { 1 }; + let walker = walkdir_fn(WalkDir::new(&base)) + .max_depth(max_depth) + .follow_root_links(starting_node) + .into_iter(); + NodeWalkerState::Walk { globset, walker, base_checked: !starting_node, recursive } + } + }; + Self { + base, + state, + destinations: node.destinations, + index_buf: Vec::new(), + walkdir_fn, + opts, + yield_self: starting_node && node.is_terminal, + root_device, + } + } +} + +impl Iterator for NodeWalker { + type Item = io::Result; + + fn next(&mut self) -> Option { + loop { + let mut entry = None; + self.index_buf.clear(); + + match &mut self.state { + _ if self.yield_self => { + self.yield_self = false; + let Ok(meta) = fs::metadata(&self.base) else { + continue; + }; + let Ok(follow) = fs::symlink_metadata(&self.base).map(|m| m.is_symlink()) + else { + continue; + }; + let entry = DirEntry::from_meta(self.base.clone(), meta, follow); + return Some(Ok(NodeWalkerOutput { + terminal: Some(entry), + ..Default::default() + })); + } + NodeWalkerState::Path { paths, index } => { + if *index >= paths.len() { + return None; + } + let i = *index; + *index += 1; + let path = paths[i].clone(); + let Ok(mut meta) = fs::symlink_metadata(&path) else { + continue; + }; + let follow = meta.is_symlink() && self.opts.follow_links; + if follow { + if let Ok(m) = fs::metadata(&path) { + meta = m; + } else { + continue; + } + } + entry = Some(DirEntry::from_meta(path, meta, follow)); + self.index_buf.push(i); + } + NodeWalkerState::Walk { walker, globset, base_checked, recursive } => { + if !*base_checked { + // if we don't do this before kicking off walkdir iteration, it will yield an error + if !self.base.try_exists().unwrap_or(false) { + return None; + } + *base_checked = true; + } + let walk_entry = match walker.next()? { + Ok(v) => v, + Err(err) => return Some(Err(err.into())), + }; + + if walk_entry.path() != self.base || *recursive { + // we check base equality because if we kick off a glob like base/*, base will match * + if let Ok(path) = walk_entry.path().strip_prefix(&self.base) { + globset.matches_into(path, &mut self.index_buf); + if !self.index_buf.is_empty() { + entry = Some(DirEntry::from_walk(walk_entry)); + } + } + } + } + } + + let Some(entry) = entry else { continue }; + let mut out = NodeWalkerOutput::default(); + + let path = entry.path().to_path_buf(); + let is_dir = entry.file_type().is_dir(); // will account for follow_links + + let mut entry = Some(entry); + for &i in &self.index_buf { + let dst = &self.destinations[i]; + if dst.is_terminal && out.terminal.is_none() { + out.terminal = entry.take(); + } + if !dst.destinations.is_empty() && is_dir { + if self.root_device.is_some() && device_num(&path).ok() != self.root_device { + continue; + } + out.nodes.push(NodeWalker::new( + dst.clone(), + path.clone(), + self.walkdir_fn.clone(), + self.opts, + false, + self.root_device, + )); + } + } + if out.terminal.is_some() || !out.nodes.is_empty() { + return Some(Ok(out)); + } + } + } +} + +/// An iterator for traversing multiple globs from a given base path. +/// +/// A value with this type must be constructed with [`MultiGlobBuilder`] type which +/// allows configuring various options related to walking and glob matching. +/// +/// [`MultiGlobBuilder`]: struct.MultiGlobBuilder.html +pub struct MultiGlobWalker { + root: PathBuf, + opts: MultiGlobOptions, + stack: Vec, + root_device: Option>, + returned: HashSet, +} + +impl MultiGlobWalker { + pub(crate) fn new(root: PathBuf, opts: MultiGlobOptions) -> Self { + Self { root, opts, stack: Vec::new(), root_device: None, returned: HashSet::new() } + } + + pub(crate) fn add( + &mut self, + base: PathBuf, + patterns: Vec, + errors: &mut Vec, + ) { + let plan = WalkPlanNode::build(&patterns); + let node = WalkPlanNodeCompiled::new(&plan, self.opts.case_insensitive, errors); + let opts = self.opts; + let walkdir_fn = Arc::new(move |walkdir| opts.configure_walkdir(walkdir)); + let walker = NodeWalker::new(node, base, walkdir_fn, self.opts, true, None); + self.stack.push(walker); + } + + pub(crate) fn rev(self) -> Self { + Self { stack: self.stack.into_iter().rev().collect(), ..self } + } +} + +impl Iterator for MultiGlobWalker { + type Item = io::Result; + + fn next(&mut self) -> Option { + if self.opts.same_file_system { + match self.root_device { + None => match device_num(&self.root) { + Ok(dn) => { + for walker in &mut self.stack { + walker.root_device = Some(dn); + } + self.root_device = Some(Some(dn)) + } + Err(err) => { + self.root_device = Some(None); + return Some(Err(err)); + } + }, + Some(None) => return None, + _ => (), + } + } + while !self.stack.is_empty() { + match self.stack.last_mut().unwrap().next() { + None => _ = self.stack.pop(), + Some(Err(err)) => return Some(Err(err)), + Some(Ok(mut res)) => { + self.stack.append(&mut res.nodes); + if let Some(mut terminal) = res.terminal { + let path = if self.opts.canonicalize { + terminal = match terminal.into_canonicalized() { + Err(err) => return Some(Err(err)), + Ok(entry) => entry, + }; + terminal.canonicalized().unwrap() + } else { + terminal.path().to_owned() + }; + if !self.returned.insert(path) { + continue; + } + return Some(Ok(terminal)); + } + } + }; + } + None + } +} + +#[cfg(test)] +mod tests { + use super::{WalkPlanNode, WalkPlanNodeCompiled}; + + #[test] + #[cfg(not(windows))] + fn test_walk_plan_node_posix() { + let node = WalkPlanNode::build(&[ + "foo/bar", + "x/y", + "foo/bar/../z", + "../../a", + "../x/y", + "../x/**/y", + "../x/**/z/*", + "../x/**", + "/var/folders/", + "/var/folders/1/2", + "/var/folders/*.doc", + "/home/user", + ]); + let mut errors = Vec::new(); + let cnode = WalkPlanNodeCompiled::new(&node, false, &mut errors); + assert!(errors.is_empty()); + let mut settings = insta::Settings::clone_current(); + settings.set_snapshot_path("tests/snapshots"); + settings.set_snapshot_suffix("node"); + settings.bind(|| insta::assert_snapshot!(&format!("{node:#?}"))); + settings.set_snapshot_suffix("cnode"); + settings.bind(|| insta::assert_snapshot!(&format!("{cnode:#?}"))); + } + + #[test] + #[cfg(windows)] + fn test_walk_plan_node_win() { + let node = WalkPlanNode::build(&[ + r"foo/bar", + r"x/y", + r"foo\bar/../z", + r"../../a", + r"..\x/y", + r"../x/**/y", + r"../x/**/z/*", + r"../x/**", + r"\var/folders/", + r"/var/folders/1/2", + r"C:\var/folders/*.doc", + r"C:\var/folders/secret\*.txt", + r"\\unc\share\foo\*\*", + r"\\unc\share\foo\[ab].txt", + ]); + let mut errors = Vec::new(); + let cnode = WalkPlanNodeCompiled::new(&node, false, &mut errors); + assert!(errors.is_empty()); + let mut settings = insta::Settings::clone_current(); + settings.set_snapshot_path("tests/snapshots"); + settings.set_snapshot_suffix("node"); + settings.bind(|| insta::assert_snapshot!(&format!("{node:#?}"))); + settings.set_snapshot_suffix("cnode"); + settings.bind(|| insta::assert_snapshot!(&format!("{cnode:#?}"))); + } +}