Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
7eaab20
Initial sort-of-working version
aldanor May 17, 2025
80e158a
a bit of a cleanup
aldanor May 17, 2025
a9ee2a8
clean up walker a bit
aldanor May 17, 2025
5497dec
dir entries and follow wip
aldanor May 19, 2025
2128895
tests and fixes
aldanor May 19, 2025
e22cd55
remove follow logic for nodes
aldanor May 19, 2025
a8a2a02
tests
aldanor May 19, 2025
8622980
rel tests
aldanor May 19, 2025
1c943a4
clippy
aldanor May 19, 2025
4b01683
add max_depth for recursive globs + test
aldanor May 19, 2025
2187bb2
cleanups and tests
aldanor May 19, 2025
57fc140
walknodecompiled debug snapshot test
aldanor May 20, 2025
e5b6c38
more fixes, more tests
aldanor May 20, 2025
3a12585
more walk tests
aldanor May 20, 2025
0c6f94c
move insta snaps
aldanor May 20, 2025
4dc09b6
split walk tests
aldanor May 20, 2025
24ccc3a
rel tests and fixes
aldanor May 20, 2025
95e7c2b
check base dir before glob
aldanor May 20, 2025
d1e7bc4
warnings
aldanor May 20, 2025
56d64d3
remove unneeded special case
aldanor May 20, 2025
033febf
minor docs and tests
aldanor May 20, 2025
a2c17a3
inline unneeded macro
aldanor May 20, 2025
7deaffa
follow root links for all starting nodes
aldanor May 20, 2025
6f1bd5d
max depth builder fix
aldanor May 20, 2025
051d796
docstrings cleanup
aldanor May 20, 2025
640a8e6
test loops and errors
aldanor May 20, 2025
d45d109
add basic ci
aldanor May 20, 2025
211a9c1
set msrv to 1.81
aldanor May 20, 2025
c87d808
run clippy in ci
aldanor May 20, 2025
e0c1904
use Path::try_exists()
aldanor May 20, 2025
ba38b19
remove tmp main bin
aldanor May 20, 2025
b8df9dd
some clippy
aldanor May 21, 2025
4ca125d
pin walkdir to 2.5
aldanor May 21, 2025
9849acb
minor cleanups
aldanor May 21, 2025
6f156d8
dep versions
aldanor May 21, 2025
2616674
dewindowsify tests, update snaps
aldanor May 21, 2025
004e082
another win attempt
aldanor May 21, 2025
5240b55
derive Debug for Trie
aldanor May 23, 2025
97bb177
more tests + win fixes
aldanor May 23, 2025
e1d9ff6
clippy fixups
aldanor May 27, 2025
83de681
cleanup
aldanor May 27, 2025
5150c3e
same file system support
aldanor May 27, 2025
2ec05b5
clippy
aldanor May 27, 2025
c3cbb6e
fix not including base dir for non-rec globs
aldanor May 27, 2025
00eae1e
implement case_insensitive
aldanor May 27, 2025
9961f61
clippy
aldanor May 27, 2025
f1b412e
test file symlinks
aldanor May 27, 2025
9d18537
clippy
aldanor May 27, 2025
138ff2c
return glob errors on construction
aldanor May 27, 2025
184a83b
add tests for invalid globs
aldanor May 27, 2025
8aa9929
use pretty_assertions
aldanor May 27, 2025
fa640c8
add a bigger walk test
aldanor May 27, 2025
7c93efb
add `canonicalize` option and dedup
aldanor May 27, 2025
86d69e8
remove debug prints
aldanor May 27, 2025
dac71b6
add canonicalize tests
aldanor May 27, 2025
cab5d96
minor doc fixups
aldanor May 27, 2025
da149a7
remove all debug statements
aldanor May 29, 2025
d2667ea
add windows tests for cluster_globs
aldanor Jun 4, 2025
bbd43be
remove recursion from trie group collection
aldanor Jun 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: ci
on:
pull_request:
branches:
- main
push:
branches:
- main
schedule:
- cron: "00 01 * * *"

permissions:
contents: read

jobs:
test:
name: test
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- build: pinned
os: ubuntu-latest
rust: 1.81.0
- build: pinned-win
os: windows-latest
rust: 1.81.0
- build: stable
os: ubuntu-latest
rust: stable
- build: beta
os: ubuntu-latest
rust: beta
- build: nightly
os: ubuntu-latest
rust: nightly
- build: macos
os: macos-latest
rust: stable
- build: win-msvc
os: windows-latest
rust: stable
- build: win-gnu
os: windows-latest
rust: stable-x86_64-gnu
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.rust }}
- run: cargo build --verbose
- if: startsWith(matrix.build, 'pinned-') == false
run: cargo doc --verbose
- if: startsWith(matrix.build, 'pinned-') == false
run: cargo test --verbose
- if: matrix.build == 'nightly'
run: |
set -x
cargo generate-lockfile -Z minimal-versions
cargo build --verbose
cargo test --verbose

rustfmt:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
components: rustfmt
- name: Check formatting
run: |
cargo fmt -- --check

clippy:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
components: clippy
- name: Check linting
run: |
cargo clippy --tests -- -D warnings
28 changes: 28 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[package]
name = "multiglob"
version = "0.1.0"
edition = "2021"
authors = ["Ivan Smirnov <aldanor@users.noreply.github.com>"]
license = "MIT or Apache-2.0"
repository = "https://github.com/aldanor/multiglob"
homepage = "https://github.com/aldanor/multiglob"
documentation = "https://docs.rs/multiglob"
categories = ["filesystem"]
keywords = ["glob", "walk", "pattern", "directory", "recursive"]
readme = "README.md"
resolver = "2"
rust-version = "1.81"

[dependencies]
globset = "0.4.1"
walkdir = "2.4"

[target.'cfg(windows)'.dependencies.winapi-util]
version = "0.1"

[dev-dependencies]
current_dir = "0.1"
insta = "1.43"
pretty_assertions = "1.4"
rstest = "0.25"
tempfile = "3"
2 changes: 2 additions & 0 deletions rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
max_width = 99
use_small_heuristics = "Max"
217 changes: 217 additions & 0 deletions src/builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
use std::path::{Path, PathBuf};

use globset::Glob;
use walkdir::WalkDir;

use crate::{cluster::cluster_globs, walk::MultiGlobWalker, GlobError};

/// Internal structure for keeping all walkdir/globset options together to pass them around.
#[derive(Clone, Copy, Debug)]
pub struct MultiGlobOptions {
pub follow_links: bool,
pub max_depth: usize,
pub max_open: usize,
pub same_file_system: bool,
pub case_insensitive: bool,
pub canonicalize: bool,
}

impl Default for MultiGlobOptions {
fn default() -> Self {
Self {
follow_links: false,
max_depth: usize::MAX,
max_open: 10,
same_file_system: false,
case_insensitive: false,
canonicalize: false,
}
}
}

impl MultiGlobOptions {
pub fn configure_walkdir(&self, walkdir: WalkDir) -> WalkDir {
walkdir
.sort_by_file_name()
.follow_links(self.follow_links)
.max_open(self.max_open)
.same_file_system(self.same_file_system)
}
}

/// A builder to create an iterator over multiple globs from a given base path.
#[derive(Clone, Debug)]
pub struct MultiGlobBuilder {
base: PathBuf,
patterns: Vec<String>,
opts: MultiGlobOptions,
}

impl MultiGlobBuilder {
/// Construct a new multiglob walker builder from a base directory and a list of patterns.
///
/// When iterated, the `base` directory will be recursively searched for paths
/// matching `patterns`.
pub fn new<B, P, S>(base: B, patterns: P) -> Self
where
B: AsRef<Path>,
P: IntoIterator<Item = S>,
S: AsRef<str>,
{
Self {
base: base.as_ref().to_owned(),
patterns: patterns.into_iter().map(|s| s.as_ref().to_owned()).collect(),
opts: MultiGlobOptions::default(),
}
}

/// Construct a multiglob walker; error that may occur when parsing globs will be propagated.
pub fn build(&self) -> Result<MultiGlobWalker, GlobError> {
let (walker, mut errors) = self.build_skip_invalid();
if !errors.is_empty() {
Err(errors.remove(0))
} else {
Ok(walker)
}
}

/// Construct a multiglob walker and skip all invalid globs patterns.
///
/// Returns list of all glob errors encountered as the second element of the tuple.
/// Note: invalid glob patterns reported in errors will not be the original patterns
pub fn build_skip_invalid(&self) -> (MultiGlobWalker, Vec<GlobError>) {
let mut patterns = self.patterns.clone();
let mut errors = Vec::new();
patterns.retain(|p| {
// do this early to try and retain original glob patterns in reported errors
if let Some(err) = Glob::new(p).err() {
errors.push(err);
false
} else {
true
}
});
let mut walker = MultiGlobWalker::new(self.base.clone(), self.opts);
let glob_groups = cluster_globs(&patterns);
let mut mg_base = self.base.clone();
if mg_base == PathBuf::new() {
mg_base = ".".into();
}
for (base, patterns) in glob_groups {
let mut base = mg_base.join(base);
if base == mg_base {
base = mg_base.clone();
}
walker.add(base, patterns, &mut errors);
}
(walker.rev(), errors)
}

/// Toggle whether the globs should be matched case insensitively or not.
///
/// This is disabled by default.
pub fn case_insensitive(mut self, yes: bool) -> Self {
self.opts.case_insensitive = yes;
self
}

/// Set the maximum depth of all recursive globs (those containing `**`).
///
/// The smallest depth is `0` and always corresponds to the path given
/// to the `new` function on this type. Its direct descendents have depth
/// `1`, and their descendents have depth `2`, and so on.
///
/// This will not simply filter the entries of the iterator, but
/// it will actually avoid descending into directories when the depth is
/// exceeded.
///
/// Note that the depth is counted not from the base directory, but from a point
/// where a recursive pattern is encountered. For example, if maximum depth is 2
/// and patterns are `../a/**` and `b/**`, then the deepest entries will look
/// like `../a/x/y` and `b/x/y`.
///
/// By default, there's no max depth limit.
pub fn max_depth(mut self, depth: usize) -> Self {
self.opts.max_depth = depth;
self
}

/// Follow symbolic links. By default, this is disabled.
///
/// When `yes` is `true`, symbolic links are followed as if they were
/// normal directories and files. If a symbolic link is broken or is
/// involved in a loop, an error is yielded.
///
/// When enabled, the yielded [`DirEntry`] values represent the target of
/// the link while the path corresponds to the link. See the [`DirEntry`]
/// type for more details.
///
/// Note, this only affects parts of globs starting from the first glob-like
/// component. For example, in a pattern `a/b/*/c/**` this will only affect
/// the `*/c/**` part of the pattern.
///
/// [`DirEntry`]: struct.DirEntry.html
pub fn follow_links(mut self, yes: bool) -> Self {
self.opts.follow_links = yes;
self
}

/// Set the maximum number of simultaneously open file descriptors used
/// by glob walker iterators.
///
/// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
/// to `1` automatically. If this is not set, then it defaults to some
/// reasonably low number.
///
/// This setting has no impact on the results yielded by the iterator
/// (even when `n` is `1`). Instead, this setting represents a trade off
/// between scarce resources (file descriptors) and memory. Namely, when
/// the maximum number of file descriptors is reached and a new directory
/// needs to be opened to continue iteration, then a previous directory
/// handle is closed and has its unyielded entries stored in memory. In
/// practice, this is a satisfying trade off because it scales with respect
/// to the *depth* of your file tree. Therefore, low values (even `1`) are
/// acceptable.
///
/// Note that this value does not impact the number of system calls made by
/// an exhausted iterator.
///
/// # Platform behavior
///
/// On Windows, if `follow_links` is enabled, then this limit is not
/// respected. In particular, the maximum number of file descriptors opened
/// is proportional to the depth of the directory tree traversed.
pub fn max_open(mut self, mut n: usize) -> Self {
if n == 0 {
n = 1;
}
self.opts.max_open = n;
self
}

/// Do not cross file system boundaries.
///
/// When this option is enabled, directory traversal will not descend into
/// directories that are on a different file system from the base path.
pub fn same_file_system(mut self, yes: bool) -> Self {
self.opts.same_file_system = yes;
self
}

/// Canonicalize paths via [`std::fs::canonicalize`] (and deduplicate by canonicalized paths).
///
/// Without this option, the walker will not be able to tell apart "a/b" and "a/../a/b"
/// and will always return both (however, it will still deduplicate non-canonicalized paths).
///
/// Notes:
/// - This is not free resource-wise as it retrieves entry metadata and resolves links.
/// - If this option is enabled and entry path cannot be canonicalized, error is returned.
/// - Resulting [`DirEntry`] objects will contain canonicalized paths along with resolved metadata.
///
/// [`std::fs::canonicalize`]: https://doc.rust-lang.org/std/fs/fn.canonicalize.html
/// [`DirEntry`]: struct.DirEntry.html
pub fn canonicalize(mut self) -> Self {
self.opts.canonicalize = true;
self
}
}
Loading