From 68c816e788736cb40265166a40769f1d433a778b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Dec 2022 11:24:30 +0100 Subject: [PATCH 1/3] make `hyperscan` a feature (default on) to allow it to be disabled. That way the library can be built on non-x64 platforms. --- Cargo.toml | 6 ++++- src/bin/noseyparker/cmd_rules.rs | 3 +++ src/matcher.rs | 40 ++++++++++++++++++-------------- src/rules_database.rs | 10 +++++++- 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 89326c3e0..00fc41b80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,10 @@ path = "src/lib.rs" name = "noseyparker" path = "src/bin/noseyparker/main.rs" +[features] +default = ["hyperscan"] +hyperscan = ["dep:hyperscan"] + [dependencies] # anyhow = { version = "1.0", features = ["backtrace"] } # add backtraces to errors -- not sure how expensive this is anyhow = { version = "1.0" } @@ -34,7 +38,7 @@ git2 = { version = "0.15", features = ["vendored-libgit2", "vendored-openssl"] } libc = "0.2" libgit2-sys = "*" hex = "0.4" -hyperscan = { version = "0.3", features = ["full", "static"] } +hyperscan = { version = "0.3", features = ["full", "static"], optional = true } # hyperscan-sys = { version = "0.3", features = ["full", "static"] } include_dir = { version = "0.7", features = ["glob"] } indenter = "0.3" diff --git a/src/bin/noseyparker/cmd_rules.rs b/src/bin/noseyparker/cmd_rules.rs index fbb8022e4..a4cc7f35f 100644 --- a/src/bin/noseyparker/cmd_rules.rs +++ b/src/bin/noseyparker/cmd_rules.rs @@ -1,4 +1,5 @@ use anyhow::{Context, Result, bail}; +#[cfg(feature = "hyperscan")] use hyperscan::prelude::{pattern, BlockDatabase, Builder, Matching}; use tracing::{debug_span, error, error_span, info, warn}; @@ -45,6 +46,7 @@ fn cmd_rules_check(_global_args: &args::GlobalArgs, args: &args::RulesCheckArgs) Ok(()) } +#[cfg(feature = "hyperscan")] fn hs_compile_pattern(pat: &str) -> Result { let pattern = pattern! {pat}; let db: BlockDatabase = pattern.build()?; @@ -123,6 +125,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result { // Ok(_db) => {} // } + #[cfg(feature = "hyperscan")] match hs_compile_pattern(&rule.uncommented_pattern()) { Err(e) => { error!("Hyperscan: failed to compile pattern: {}", e); diff --git a/src/matcher.rs b/src/matcher.rs index e3cf31aaf..592d9538b 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -57,6 +57,7 @@ pub struct BlobMatch<'r, 'b> { /// If doing multi-threaded scanning, use a separate `Matcher` for each thread. pub struct Matcher<'a> { /// A scratch buffer for Hyperscan + #[cfg(feature = "hyperscan")] hs_scratch: hyperscan::Scratch, /// A scratch vector for raw matches from Hyperscan, to minimize allocation @@ -96,6 +97,7 @@ impl<'a> Matcher<'a> { global_stats: Option<&'a Mutex>, ) -> Result { Ok(Matcher { + #[cfg(feature = "hyperscan")] hs_scratch: rules_db.hsdb.alloc_scratch()?, raw_matches_scratch: Vec::with_capacity(16384), rules_db, @@ -106,25 +108,29 @@ impl<'a> Matcher<'a> { } #[inline] + #[cfg_attr(not(feature = "hyperscan"), allow(unused_variables))] fn scan_bytes_raw(&mut self, input: &[u8]) -> Result<()> { self.raw_matches_scratch.clear(); - let input_len: u64 = input.len().try_into().unwrap(); - self.rules_db - .hsdb - .scan(input, &self.hs_scratch, |id: u32, from: u64, to: u64, _flags: u32| { - // let start_idx = if from == hyperscan_sys::HS_OFFSET_PAST_HORIZON { 0 } else { from }; - // - // NOTE: `from` is only going to be meaningful here if we start compiling rules - // with the HS_SOM_LEFTMOST flag. But it doesn't seem to hurt to use the 0-value - // provided when that flag is not used. - let start_idx = std::cmp::min(from.try_into().unwrap(), input_len); - self.raw_matches_scratch.push(RawMatch { - rule_id: id.try_into().unwrap(), - start_idx, - end_idx: to.try_into().unwrap(), - }); - hyperscan::Matching::Continue - })?; + #[cfg(feature = "hyperscan")] + { + let input_len: u64 = input.len().try_into().unwrap(); + self.rules_db + .hsdb + .scan(input, &self.hs_scratch, |id: u32, from: u64, to: u64, _flags: u32| { + // let start_idx = if from == hyperscan_sys::HS_OFFSET_PAST_HORIZON { 0 } else { from }; + // + // NOTE: `from` is only going to be meaningful here if we start compiling rules + // with the HS_SOM_LEFTMOST flag. But it doesn't seem to hurt to use the 0-value + // provided when that flag is not used. + let start_idx = std::cmp::min(from.try_into().unwrap(), input_len); + self.raw_matches_scratch.push(RawMatch { + rule_id: id.try_into().unwrap(), + start_idx, + end_idx: to.try_into().unwrap(), + }); + hyperscan::Matching::Continue + })?; + } Ok(()) } diff --git a/src/rules_database.rs b/src/rules_database.rs index 3267bf477..094675d5b 100644 --- a/src/rules_database.rs +++ b/src/rules_database.rs @@ -1,4 +1,7 @@ -use anyhow::{bail, Context, Result}; +use anyhow::{bail, Result}; +#[cfg(feature = "hyperscan")] +use anyhow::{Context}; +#[cfg(feature = "hyperscan")] use hyperscan::prelude::{Builder, Pattern, Patterns}; use regex::bytes::Regex; use std::path::Path; @@ -11,6 +14,7 @@ pub struct RulesDatabase { // NOTE: pub(crate) here so that `Matcher` can access these pub(crate) rules: Rules, pub(crate) anchored_regexes: Vec, + #[cfg(feature = "hyperscan")] pub(crate) hsdb: hyperscan::BlockDatabase, } @@ -33,6 +37,7 @@ impl RulesDatabase { bail!("No rules to compile"); } + #[cfg(feature = "hyperscan")] let patterns = rules .rules .iter() @@ -43,6 +48,7 @@ impl RulesDatabase { .collect::>>()?; let t1 = Instant::now(); + #[cfg(feature = "hyperscan")] let hsdb = Patterns::build(&Patterns::from(patterns))?; let d1 = t1.elapsed().as_secs_f64(); @@ -57,6 +63,7 @@ impl RulesDatabase { debug!("Compiled {} rules: hyperscan {}s; regex {}s", rules.rules.len(), d1, d2); Ok(RulesDatabase { rules, + #[cfg(feature = "hyperscan")] hsdb, anchored_regexes, }) @@ -71,6 +78,7 @@ impl RulesDatabase { } #[cfg(test)] +#[cfg(feature = "hyperscan")] mod test { use super::*; use pretty_assertions::assert_eq; From c61615e6d9424f9d62e7211a0de94ac3c08359ba Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Dec 2022 11:43:24 +0100 Subject: [PATCH 2/3] use gitoxide in the simplest possible way to extract blob data This doesn't speed up the enumeration phase (which might not be necessary) but shows how much faster gitoxide can be if used like that. --- Cargo.lock | 1047 +++++++++++++++++++++++++++++-- Cargo.toml | 1 + src/bin/noseyparker/cmd_scan.rs | 16 +- src/blob_id.rs | 11 + src/input_enumerator.rs | 17 +- src/match_type.rs | 4 +- 6 files changed, 1037 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f21591812..11046fb02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "ahash" version = "0.7.6" @@ -13,6 +19,18 @@ dependencies = [ "version_check", ] +[[package]] +name = "ahash" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.19" @@ -34,12 +52,33 @@ version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" +[[package]] +name = "arc-swap" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "983cd8b9d4b02a6dc6ffa557262eb5858a27a0038ffffe21a0f133eaa819a164" + [[package]] name = "arrayvec" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atty" version = "0.2.14" @@ -99,6 +138,27 @@ dependencies = [ "serde", ] +[[package]] +name = "bstr" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca0852af221f458706eb0725c03e4ed6c46af9ac98e6a689d5e634215d594dd" +dependencies = [ + "memchr", + "once_cell", + "regex-automata", + "serde", +] + +[[package]] +name = "btoi" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97c0869a9faa81f8bbf8102371105d6d0a7b79167a04c340b04ab16892246a11" +dependencies = [ + "num-traits", +] + [[package]] name = "bumpalo" version = "3.11.1" @@ -111,6 +171,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "bytesize" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70" + [[package]] name = "cargo-emit" version = "0.2.1" @@ -123,6 +189,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.0.77" @@ -226,6 +301,32 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "clru" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" + +[[package]] +name = "cmake" +version = "0.1.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db34956e100b30725f2eb215f90d4871051239535632f84fea3bc92722c66b7c" +dependencies = [ + "cc", +] + +[[package]] +name = "compact_str" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5138945395949e7dfba09646dc9e766b548ff48e23deb5246890e6b64ae9e1b9" +dependencies = [ + "castaway", + "itoa 1.0.4", + "ryu", +] + [[package]] name = "console" version = "0.15.2" @@ -255,6 +356,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "criterion" version = "0.4.0" @@ -291,6 +401,20 @@ dependencies = [ "itertools", ] +[[package]] +name = "crossbeam" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + [[package]] name = "crossbeam-channel" version = "0.5.6" @@ -325,6 +449,16 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.13" @@ -350,7 +484,7 @@ version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" dependencies = [ - "bstr", + "bstr 0.2.17", "csv-core", "itoa 0.4.8", "ryu", @@ -376,6 +510,19 @@ dependencies = [ "syn", ] +[[package]] +name = "dashmap" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +dependencies = [ + "cfg-if", + "hashbrown 0.12.3", + "lock_api", + "once_cell", + "parking_lot_core 0.9.5", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -405,6 +552,15 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -415,6 +571,17 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -486,6 +653,29 @@ dependencies = [ "instant", ] +[[package]] +name = "filetime" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "windows-sys", +] + +[[package]] +name = "flate2" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +dependencies = [ + "crc32fast", + "libz-sys", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -507,61 +697,502 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" dependencies = [ - "foreign-types-macros", - "foreign-types-shared 0.3.1", + "foreign-types-macros", + "foreign-types-shared 0.3.1", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8469d0d40519bc608ec6863f1cc88f3f1deee15913f2f3b3e573d81ed38cccc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + +[[package]] +name = "form_urlencoded" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "generic-array" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "git-actor" +version = "0.14.1" +dependencies = [ + "bstr 1.0.1", + "btoi", + "git-date", + "itoa 1.0.4", + "nom", + "quick-error 2.0.1", +] + +[[package]] +name = "git-attributes" +version = "0.6.0" +dependencies = [ + "bstr 1.0.1", + "compact_str", + "git-features", + "git-glob", + "git-path", + "git-quote", + "thiserror", + "unicode-bom", +] + +[[package]] +name = "git-bitmap" +version = "0.2.0" +dependencies = [ + "quick-error 2.0.1", +] + +[[package]] +name = "git-chunk" +version = "0.4.0" +dependencies = [ + "thiserror", +] + +[[package]] +name = "git-command" +version = "0.2.0" +dependencies = [ + "bstr 1.0.1", +] + +[[package]] +name = "git-config" +version = "0.12.0" +dependencies = [ + "bstr 1.0.1", + "git-config-value", + "git-features", + "git-glob", + "git-path", + "git-ref", + "git-sec", + "memchr", + "nom", + "once_cell", + "smallvec", + "thiserror", + "unicode-bom", +] + +[[package]] +name = "git-config-value" +version = "0.9.0" +dependencies = [ + "bitflags", + "bstr 1.0.1", + "git-path", + "libc", + "thiserror", +] + +[[package]] +name = "git-credentials" +version = "0.7.0" +dependencies = [ + "bstr 1.0.1", + "git-command", + "git-config-value", + "git-path", + "git-prompt", + "git-sec", + "git-url", + "thiserror", +] + +[[package]] +name = "git-date" +version = "0.3.0" +dependencies = [ + "bstr 1.0.1", + "itoa 1.0.4", + "thiserror", + "time", +] + +[[package]] +name = "git-diff" +version = "0.23.0" +dependencies = [ + "git-hash", + "git-object", + "imara-diff", + "thiserror", +] + +[[package]] +name = "git-discover" +version = "0.9.0" +dependencies = [ + "bstr 1.0.1", + "git-hash", + "git-path", + "git-ref", + "git-sec", + "thiserror", +] + +[[package]] +name = "git-features" +version = "0.24.1" +dependencies = [ + "crc32fast", + "crossbeam-channel", + "crossbeam-utils", + "flate2", + "git-hash", + "jwalk", + "libc", + "num_cpus", + "once_cell", + "parking_lot 0.12.1", + "prodash", + "quick-error 2.0.1", + "sha1", + "sha1_smol", + "walkdir", +] + +[[package]] +name = "git-glob" +version = "0.5.0" +dependencies = [ + "bitflags", + "bstr 1.0.1", +] + +[[package]] +name = "git-hash" +version = "0.10.1" +dependencies = [ + "hex", + "thiserror", +] + +[[package]] +name = "git-hashtable" +version = "0.1.0" +dependencies = [ + "git-hash", + "hashbrown 0.13.1", +] + +[[package]] +name = "git-index" +version = "0.9.1" +dependencies = [ + "atoi", + "bitflags", + "bstr 1.0.1", + "filetime", + "git-bitmap", + "git-features", + "git-hash", + "git-lock", + "git-object", + "git-traverse", + "itoa 1.0.4", + "memmap2", + "smallvec", + "thiserror", +] + +[[package]] +name = "git-lock" +version = "3.0.0" +dependencies = [ + "fastrand", + "git-tempfile", + "quick-error 2.0.1", +] + +[[package]] +name = "git-mailmap" +version = "0.6.0" +dependencies = [ + "bstr 1.0.1", + "git-actor", + "quick-error 2.0.1", +] + +[[package]] +name = "git-object" +version = "0.23.0" +dependencies = [ + "bstr 1.0.1", + "btoi", + "git-actor", + "git-features", + "git-hash", + "git-validate", + "hex", + "itoa 1.0.4", + "nom", + "smallvec", + "thiserror", +] + +[[package]] +name = "git-odb" +version = "0.37.0" +dependencies = [ + "arc-swap", + "git-features", + "git-hash", + "git-object", + "git-pack", + "git-path", + "git-quote", + "parking_lot 0.12.1", + "tempfile", + "thiserror", +] + +[[package]] +name = "git-pack" +version = "0.27.0" +dependencies = [ + "bytesize", + "clru", + "dashmap", + "git-chunk", + "git-diff", + "git-features", + "git-hash", + "git-hashtable", + "git-object", + "git-path", + "git-tempfile", + "git-traverse", + "memmap2", + "parking_lot 0.12.1", + "smallvec", + "thiserror", + "uluru", +] + +[[package]] +name = "git-path" +version = "0.6.0" +dependencies = [ + "bstr 1.0.1", + "thiserror", +] + +[[package]] +name = "git-prompt" +version = "0.2.0" +dependencies = [ + "git-command", + "git-config-value", + "nix", + "parking_lot 0.12.1", + "thiserror", +] + +[[package]] +name = "git-quote" +version = "0.4.0" +dependencies = [ + "bstr 1.0.1", + "btoi", + "quick-error 2.0.1", +] + +[[package]] +name = "git-ref" +version = "0.20.0" +dependencies = [ + "git-actor", + "git-features", + "git-hash", + "git-lock", + "git-object", + "git-path", + "git-tempfile", + "git-validate", + "memmap2", + "nom", + "thiserror", +] + +[[package]] +name = "git-refspec" +version = "0.4.0" +dependencies = [ + "bstr 1.0.1", + "git-hash", + "git-revision", + "git-validate", + "smallvec", + "thiserror", +] + +[[package]] +name = "git-repository" +version = "0.29.0" +dependencies = [ + "git-actor", + "git-attributes", + "git-config", + "git-credentials", + "git-date", + "git-diff", + "git-discover", + "git-features", + "git-glob", + "git-hash", + "git-hashtable", + "git-index", + "git-lock", + "git-mailmap", + "git-object", + "git-odb", + "git-pack", + "git-path", + "git-prompt", + "git-ref", + "git-refspec", + "git-revision", + "git-sec", + "git-tempfile", + "git-traverse", + "git-url", + "git-validate", + "git-worktree", + "log", + "once_cell", + "prodash", + "signal-hook", + "smallvec", + "thiserror", + "unicode-normalization", +] + +[[package]] +name = "git-revision" +version = "0.7.0" +dependencies = [ + "bstr 1.0.1", + "git-date", + "git-hash", + "git-hashtable", + "git-object", + "thiserror", ] [[package]] -name = "foreign-types-macros" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8469d0d40519bc608ec6863f1cc88f3f1deee15913f2f3b3e573d81ed38cccc" +name = "git-sec" +version = "0.5.0" dependencies = [ - "proc-macro2", - "quote", - "syn", + "bitflags", + "dirs", + "git-path", + "libc", + "windows", ] [[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +name = "git-tempfile" +version = "3.0.0" +dependencies = [ + "dashmap", + "libc", + "once_cell", + "signal-hook", + "signal-hook-registry", + "tempfile", +] [[package]] -name = "foreign-types-shared" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" +name = "git-traverse" +version = "0.19.0" +dependencies = [ + "git-hash", + "git-hashtable", + "git-object", + "thiserror", +] [[package]] -name = "form_urlencoded" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +name = "git-url" +version = "0.11.0" dependencies = [ - "percent-encoding", + "bstr 1.0.1", + "git-features", + "git-path", + "home", + "thiserror", + "url", ] [[package]] -name = "generic-array" -version = "0.14.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +name = "git-validate" +version = "0.7.0" dependencies = [ - "typenum", - "version_check", + "bstr 1.0.1", + "thiserror", ] [[package]] -name = "getrandom" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +name = "git-worktree" +version = "0.9.0" dependencies = [ - "cfg-if", - "libc", - "wasi", + "bstr 1.0.1", + "git-attributes", + "git-features", + "git-glob", + "git-hash", + "git-index", + "git-object", + "git-path", + "io-close", + "thiserror", ] [[package]] @@ -592,7 +1223,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a" dependencies = [ "aho-corasick", - "bstr", + "bstr 0.2.17", "fnv", "log", "regex", @@ -610,16 +1241,22 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash", + "ahash 0.7.6", ] +[[package]] +name = "hashbrown" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038" + [[package]] name = "hashlink" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa" dependencies = [ - "hashbrown", + "hashbrown 0.12.3", ] [[package]] @@ -652,6 +1289,21 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "home" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "747309b4b440c06d57b0b25f2aee03ee9b5e5397d288c60e21fc709bb98a7408" +dependencies = [ + "winapi", +] + +[[package]] +name = "human_format" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86cce260d758a9aa3d7c4b99d55c815a540f8a37514ba6046ab6be402a157cb0" + [[package]] name = "hyperscan" version = "0.3.0" @@ -711,6 +1363,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "imara-diff" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e98c1d0ad70fc91b8b9654b1f33db55e59579d3b3de2bffdced0fdb810570cb8" +dependencies = [ + "ahash 0.8.2", + "hashbrown 0.12.3", +] + [[package]] name = "include_dir" version = "0.7.3" @@ -744,7 +1406,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", ] [[package]] @@ -776,6 +1438,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "io-close" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cadcf447f06744f8ce713d2d6239bb5bde2c357a452397a9ed90c625da390bc" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "io-lifetimes" version = "1.0.3" @@ -837,6 +1509,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jwalk" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2735847566356cd2179a2a38264839308f7079fa96e6bd5a42d740460e003c56" +dependencies = [ + "crossbeam", + "rayon", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -895,6 +1577,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf" dependencies = [ "cc", + "cmake", "libc", "pkg-config", "vcpkg", @@ -906,6 +1589,16 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f9f08d8963a6c613f4b1a78f4f4a4dbfadf8e6545b2d72861731e4858b8b47f" +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.17" @@ -930,6 +1623,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.7.1" @@ -939,6 +1641,43 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +dependencies = [ + "adler", +] + +[[package]] +name = "nix" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a58d1d356c6597d08cde02c2f09d785b09e28711837b1ed667dc652c08a694" +dependencies = [ + "bitflags", + "cfg-if", + "libc", + "static_assertions", +] + +[[package]] +name = "nom" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "noseyparker" version = "0.10.0" @@ -947,6 +1686,7 @@ dependencies = [ "atty", "clap 4.0.29", "criterion", + "git-repository", "git2", "hex", "hyperscan", @@ -1006,6 +1746,15 @@ dependencies = [ "libc", ] +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + [[package]] name = "number_prefix" version = "0.4.0" @@ -1100,6 +1849,54 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core 0.9.5", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ff9f3fef3968a3ec5945535ed654cb38ff72d7495a25619e2247fb15a2ed9ba" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + [[package]] name = "percent-encoding" version = "2.2.0" @@ -1217,6 +2014,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prodash" +version = "22.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38e2b91fcc982d0d8ae5e9d477561c73e09c24c5c19bac4858e202f6f065a13e" +dependencies = [ + "bytesize", + "dashmap", + "human_format", + "parking_lot 0.11.2", +] + [[package]] name = "proptest" version = "1.0.0" @@ -1516,8 +2325,24 @@ dependencies = [ "cfg-if", "cpufeatures", "digest", + "sha1-asm", ] +[[package]] +name = "sha1-asm" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "563d4f7100bc3fce234e5f37bbf63dc2752558964505ba6ac3f7204bdc59eaac" +dependencies = [ + "cc", +] + +[[package]] +name = "sha1_smol" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" + [[package]] name = "sharded-slab" version = "0.1.4" @@ -1527,12 +2352,37 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "signal-hook" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +dependencies = [ + "libc", +] + [[package]] name = "smallvec" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strip-ansi-escapes" version = "0.1.1" @@ -1648,6 +2498,35 @@ dependencies = [ "once_cell", ] +[[package]] +name = "time" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" +dependencies = [ + "itoa 1.0.4", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" +dependencies = [ + "time-core", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -1737,6 +2616,15 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +[[package]] +name = "uluru" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "794a32261a1f5eb6a4462c81b59cec87b5c27d5deea7dd1ac8fc781c41d226db" +dependencies = [ + "arrayvec 0.7.2", +] + [[package]] name = "unicase" version = "2.6.0" @@ -1752,6 +2640,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +[[package]] +name = "unicode-bom" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63ec69f541d875b783ca40184d655f2927c95f0bffd486faa83cd3ac3529ec32" + [[package]] name = "unicode-ident" version = "1.0.5" @@ -1826,7 +2720,7 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6cbce692ab4ca2f1f3047fcf732430249c0e971bfdd2b234cf2c47ad93af5983" dependencies = [ - "arrayvec", + "arrayvec 0.5.2", "utf8parse", "vte_generate_state_changes", ] @@ -1962,57 +2856,114 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e30acc718a52fb130fec72b1cb5f55ffeeec9253e1b785e94db222178a6acaa1" +dependencies = [ + "windows_aarch64_gnullvm 0.40.0", + "windows_aarch64_msvc 0.40.0", + "windows_i686_gnu 0.40.0", + "windows_i686_msvc 0.40.0", + "windows_x86_64_gnu 0.40.0", + "windows_x86_64_gnullvm 0.40.0", + "windows_x86_64_msvc 0.40.0", +] + [[package]] name = "windows-sys" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.0", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", + "windows_x86_64_gnullvm 0.42.0", + "windows_x86_64_msvc 0.42.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3caa4a1a16561b714323ca6b0817403738583033a6a92e04c5d10d4ba37ca10" + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_msvc" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "328973c62dfcc50fb1aaa8e7100676e0b642fe56bac6bafff3327902db843ab4" + [[package]] name = "windows_aarch64_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_i686_gnu" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa5b09fad70f0df85dea2ac2a525537e415e2bf63ee31cf9b8e263645ee9f3c1" + [[package]] name = "windows_i686_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_msvc" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a1ad4031c1a98491fa195d8d43d7489cb749f135f2e5c4eed58da094bd0d876" + [[package]] name = "windows_i686_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_x86_64_gnu" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520ff37edd72da8064b49d2281182898e17f0688ae9f4070bca27e4b5c162ac7" + [[package]] name = "windows_x86_64_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046e5b82215102c44fd75f488f1b9158973d02aa34d06ed85c23d6f5520a2853" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_msvc" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0c9c6df55dd1bfa76e131cef44bdd8ec9c819ef3611f04dfe453fd5bfeda28" + [[package]] name = "windows_x86_64_msvc" version = "0.42.0" diff --git a/Cargo.toml b/Cargo.toml index 00fc41b80..567020f9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ anyhow = { version = "1.0" } atty = "0.2" clap = { version = "4.0", features = ["cargo", "derive", "env", "unicode", "wrap_help"] } git2 = { version = "0.15", features = ["vendored-libgit2", "vendored-openssl"] } +git-repository = { version = "0.29.0", features = ["max-performance"] } libc = "0.2" libgit2-sys = "*" hex = "0.4" diff --git a/src/bin/noseyparker/cmd_scan.rs b/src/bin/noseyparker/cmd_scan.rs index f76bb12e7..2c37eeb33 100644 --- a/src/bin/noseyparker/cmd_scan.rs +++ b/src/bin/noseyparker/cmd_scan.rs @@ -5,6 +5,7 @@ use std::sync::mpsc; use std::sync::Mutex; use std::time::Instant; use tracing::{debug, debug_span, error}; +use git_repository as git; use crate::args; @@ -266,18 +267,18 @@ pub fn run(global_args: &args::GlobalArgs, args: &args::ScanArgs) -> Result<()> // Scan Git repo inputs // --------------------------------------------------------------------------------------------- inputs.git_repos.par_iter().for_each(|git_repo_result| { + let repo = open_git_repo(&git_repo_result.path) + .ok() + .flatten() + .expect("should be able to re-open repository").into_sync(); git_repo_result .blobs .par_iter() .with_min_len(128) .for_each_init( || { - let repo = open_git_repo(&git_repo_result.path) - .ok() - .flatten() - .expect("should be able to re-open repository"); let matcher = make_matcher().expect("should be able to create a matcher"); - (repo, matcher, progress.clone()) + (repo.to_thread_local(), matcher, progress.clone()) }, |(repo, matcher, progress), (oid, size)| { progress.inc(*size); @@ -290,7 +291,7 @@ pub fn run(global_args: &args::GlobalArgs, args: &args::ScanArgs) -> Result<()> if seen_blobs.contains(&blob_id) { return; } - let blob = match repo.find_blob(*oid) { + let blob = match repo.find_object(git::hash::ObjectId::from(oid.as_bytes())) { Err(e) => { error!( "Failed to read blob {} from Git repository at {:?}: {}", @@ -298,7 +299,8 @@ pub fn run(global_args: &args::GlobalArgs, args: &args::ScanArgs) -> Result<()> ); return; } - Ok(blob) => Blob::new(blob_id, blob.content().to_owned()), + // TODO: get rid of this extra copy + Ok(blob) => Blob::new(blob_id, blob.data.to_owned()), }; let provenance = Provenance::FromGitRepo(path.to_path_buf()); match matcher.scan_blob(&blob, &provenance) { diff --git a/src/blob_id.rs b/src/blob_id.rs index a32b7a3db..6359e3949 100644 --- a/src/blob_id.rs +++ b/src/blob_id.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use git_repository as git; // ------------------------------------------------------------------------------------------------- // BlobId @@ -60,6 +61,16 @@ impl BlobId { } } +impl<'a> From<&'a git::ObjectId> for BlobId { + fn from(id: &'a git::ObjectId) -> Self { + BlobId( + id.as_bytes() + .try_into() + .expect("oid should be a 20-byte value"), + ) + } +} + impl std::fmt::Display for BlobId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.hex()) diff --git a/src/input_enumerator.rs b/src/input_enumerator.rs index 1dd3e3d5e..e83324736 100644 --- a/src/input_enumerator.rs +++ b/src/input_enumerator.rs @@ -1,5 +1,6 @@ use anyhow::{bail, Result}; use git2::{Oid, Repository, RepositoryOpenFlags}; +use git_repository as git; use ignore::{WalkBuilder, WalkState}; use std::ffi::OsStr; use std::path::{Path, PathBuf}; @@ -142,7 +143,7 @@ impl<'t> ignore::ParallelVisitor for Visitor<'t> { }); } } else if metadata.is_dir() { - match open_git_repo(path) { + match open_git2_repo(path) { Err(e) => { error!("Failed to open Git repository at {:?}: {}; skipping", path, e); return WalkState::Skip; @@ -256,7 +257,7 @@ impl FilesystemEnumerator { } /// Opens the given Git repository if it exists, returning None otherwise. -pub fn open_git_repo(path: &Path) -> Result> { +pub fn open_git2_repo(path: &Path) -> Result> { match Repository::open_ext( path, RepositoryOpenFlags::NO_SEARCH | RepositoryOpenFlags::NO_DOTGIT, // | RepositoryOpenFlags::BARE, @@ -270,6 +271,18 @@ pub fn open_git_repo(path: &Path) -> Result> { } } +/// Opens the given Git repository if it exists, returning None otherwise. +pub fn open_git_repo(path: &Path) -> Result> { + match git::open_opts( + path, + git::open::Options::isolated() + ) { + Err(git::open::Error::NotARepository{..}) => Ok(None), + Err(err) => Err(err.into()), + Ok(r) => Ok(Some(r)), + } +} + pub struct GitRepoEnumeratorResult { pub blobs: Vec<(Oid, u64)>, } diff --git a/src/match_type.rs b/src/match_type.rs index c028f292d..596cca7b4 100644 --- a/src/match_type.rs +++ b/src/match_type.rs @@ -45,9 +45,9 @@ pub struct Match { impl Match { #[inline] - pub fn new<'r, 'b>( + pub fn new( loc_mapping: &LocationMapping, - blob_match: BlobMatch<'r, 'b>, + blob_match: BlobMatch<'_, '_>, provenance: &Provenance, ) -> Vec { let offsets = &blob_match.matching_input_offset_span; From f69389c88935935284a4a0302c8064d105eb7449 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Dec 2022 17:25:06 +0100 Subject: [PATCH 3/3] Use `gitoxide` for enumeration as well. It's just single-threaded, but appears to be faster nonetheless. --- Cargo.lock | 120 +++++++++++++++++++++++++++++++--------- Cargo.toml | 2 +- src/input_enumerator.rs | 41 +++++++------- 3 files changed, 116 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 11046fb02..71de27a4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -756,7 +756,9 @@ dependencies = [ [[package]] name = "git-actor" -version = "0.14.1" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7def29b46f25f95a2e196323cfb336eae9965e0a3c7c35ad9506f295c3a8e234" dependencies = [ "bstr 1.0.1", "btoi", @@ -768,7 +770,9 @@ dependencies = [ [[package]] name = "git-attributes" -version = "0.6.0" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0affaed361598fdd06b2a184a566c823d0b5817b09f576018248fb267193a96" dependencies = [ "bstr 1.0.1", "compact_str", @@ -783,6 +787,8 @@ dependencies = [ [[package]] name = "git-bitmap" version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44304093ac66a0ada1b243c15c3a503a165a1d0f50bec748f4e5a9b84a0d0722" dependencies = [ "quick-error 2.0.1", ] @@ -790,6 +796,8 @@ dependencies = [ [[package]] name = "git-chunk" version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3090baa2f4a3fe488a9b3e31090b83259aaf930bf0634af34c18117274f8f1a8" dependencies = [ "thiserror", ] @@ -797,13 +805,17 @@ dependencies = [ [[package]] name = "git-command" version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6b98a6312fef79b326c0a6e15d576c2bd30f7f9d0b7964998d166049e0d7b9e" dependencies = [ "bstr 1.0.1", ] [[package]] name = "git-config" -version = "0.12.0" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ff189268cfb19d5151529ac30b6b708072ebfa1075643d785232675456ec320" dependencies = [ "bstr 1.0.1", "git-config-value", @@ -822,7 +834,9 @@ dependencies = [ [[package]] name = "git-config-value" -version = "0.9.0" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989a90c1c630513a153c685b4249b96fdf938afc75bf7ef2ae1ccbd3d799f5db" dependencies = [ "bitflags", "bstr 1.0.1", @@ -833,7 +847,9 @@ dependencies = [ [[package]] name = "git-credentials" -version = "0.7.0" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28da3d029be10258007699d002321a3b1ebe45e67b0e140a4cf464ba3ee79b32" dependencies = [ "bstr 1.0.1", "git-command", @@ -847,7 +863,9 @@ dependencies = [ [[package]] name = "git-date" -version = "0.3.0" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a2874ce2f3a77cb144167901ea830969e5c991eac7bfee85e6e3f53ef9fcdf2" dependencies = [ "bstr 1.0.1", "itoa 1.0.4", @@ -857,7 +875,9 @@ dependencies = [ [[package]] name = "git-diff" -version = "0.23.0" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f30011a43908645c492dfbea7b004e10528be6bd667bf5cdc12ff4297fe1e3c" dependencies = [ "git-hash", "git-object", @@ -867,7 +887,9 @@ dependencies = [ [[package]] name = "git-discover" -version = "0.9.0" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93c244b1cf7cf45501116e948506c25324e33ddc613f00557ff5bfded2132009" dependencies = [ "bstr 1.0.1", "git-hash", @@ -879,7 +901,9 @@ dependencies = [ [[package]] name = "git-features" -version = "0.24.1" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "510591428bb22671eb60f56430975718af88fdae55a1489d403005f74c0d3c25" dependencies = [ "crc32fast", "crossbeam-channel", @@ -900,7 +924,9 @@ dependencies = [ [[package]] name = "git-glob" -version = "0.5.0" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3908404c9b76ac7b3f636a104142378d3eaa78623cbc6eb7c7f0651979d48e8a" dependencies = [ "bitflags", "bstr 1.0.1", @@ -909,6 +935,8 @@ dependencies = [ [[package]] name = "git-hash" version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1532d82bf830532f8d545c5b7b568e311e3593f16cf7ee9dd0ce03c74b12b99d" dependencies = [ "hex", "thiserror", @@ -917,6 +945,8 @@ dependencies = [ [[package]] name = "git-hashtable" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c52b625ad8cc360a0b7f426266f21fb07bd49b8f4ccf1b3ca7bc89424db1dec4" dependencies = [ "git-hash", "hashbrown 0.13.1", @@ -924,7 +954,9 @@ dependencies = [ [[package]] name = "git-index" -version = "0.9.1" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20627f71f3a884b0ae50f9f3abb3a07d9b117d06e16110d25b85da4d71d478c0" dependencies = [ "atoi", "bitflags", @@ -945,6 +977,8 @@ dependencies = [ [[package]] name = "git-lock" version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e4f05b8a68c3a5dd83a6651c76be384e910fe283072184fdab9d77f87ccec2" dependencies = [ "fastrand", "git-tempfile", @@ -953,7 +987,9 @@ dependencies = [ [[package]] name = "git-mailmap" -version = "0.6.0" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90e3ee2eaeebda8a12d17f4d99dff5b19d81536476020bcebb99ee121820466" dependencies = [ "bstr 1.0.1", "git-actor", @@ -962,7 +998,9 @@ dependencies = [ [[package]] name = "git-object" -version = "0.23.0" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b658f1e3e149d88cb3e0a2234be749bb0cab65887405975dbe6f3190cf6571" dependencies = [ "bstr 1.0.1", "btoi", @@ -979,7 +1017,9 @@ dependencies = [ [[package]] name = "git-odb" -version = "0.37.0" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a30a069e4c30d8aeabe41235f9a1595b60186a3cdfae73a7f3c89054e3e0d0ad" dependencies = [ "arc-swap", "git-features", @@ -995,7 +1035,9 @@ dependencies = [ [[package]] name = "git-pack" -version = "0.27.0" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed3c9af66949553af9795b9eac9d450a5bdceee9959352cda468997ddce0d2f" dependencies = [ "bytesize", "clru", @@ -1018,7 +1060,9 @@ dependencies = [ [[package]] name = "git-path" -version = "0.6.0" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e40e68481a06da243d3f4dfd86a4be39c24eefb535017a862e845140dcdb878a" dependencies = [ "bstr 1.0.1", "thiserror", @@ -1026,7 +1070,9 @@ dependencies = [ [[package]] name = "git-prompt" -version = "0.2.0" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3612a486e507dd431ef0f7108eeaafc8fd1ed7bd0f205a88554f6f91fe5dccbf" dependencies = [ "git-command", "git-config-value", @@ -1038,6 +1084,8 @@ dependencies = [ [[package]] name = "git-quote" version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd11f4e7f251ab297545faa4c5a4517f4985a43b9c16bf96fa49107f58e837f" dependencies = [ "bstr 1.0.1", "btoi", @@ -1046,7 +1094,9 @@ dependencies = [ [[package]] name = "git-ref" -version = "0.20.0" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c97b7d719e4320179fb64d081016e7faca56fed4a8ee4cf84e4697faad9235a3" dependencies = [ "git-actor", "git-features", @@ -1063,7 +1113,9 @@ dependencies = [ [[package]] name = "git-refspec" -version = "0.4.0" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d478e9db0956d60cd386d3348b5ec093e3ae613105a7a75ff6084b886254eba8" dependencies = [ "bstr 1.0.1", "git-hash", @@ -1075,7 +1127,9 @@ dependencies = [ [[package]] name = "git-repository" -version = "0.29.0" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f82f160ad03149996a5c74f02aa0879dc308c996ea8d0397aabdd8556866c17" dependencies = [ "git-actor", "git-attributes", @@ -1116,7 +1170,9 @@ dependencies = [ [[package]] name = "git-revision" -version = "0.7.0" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7516b1db551756b4d3176c4b7d18ccc4b79d35dcc5e74f768c90f5bb11bb6c9" dependencies = [ "bstr 1.0.1", "git-date", @@ -1128,7 +1184,9 @@ dependencies = [ [[package]] name = "git-sec" -version = "0.5.0" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1802e8252fa223b0ad89a393aed461132174ced1e6842a41f56dc92a3fc14f" dependencies = [ "bitflags", "dirs", @@ -1140,6 +1198,8 @@ dependencies = [ [[package]] name = "git-tempfile" version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6bb4dee86c8cae5a078cfaac3b004ef99c31548ed86218f23a7ff9b4b74f3be" dependencies = [ "dashmap", "libc", @@ -1151,7 +1211,9 @@ dependencies = [ [[package]] name = "git-traverse" -version = "0.19.0" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5141dde56d0c4861193c760e01fb61c7e03a32d0840ba93a0ac1c597588d4d" dependencies = [ "git-hash", "git-hashtable", @@ -1161,7 +1223,9 @@ dependencies = [ [[package]] name = "git-url" -version = "0.11.0" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6f4c5ba88ef911572dbf3c5c19212e72d1ce6dd924d933d7baa96a6d5f1cd6" dependencies = [ "bstr 1.0.1", "git-features", @@ -1173,7 +1237,9 @@ dependencies = [ [[package]] name = "git-validate" -version = "0.7.0" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431cf9352c596dc7c8ec9066ee551ce54e63c86c3c767e5baf763f6019ff3c2" dependencies = [ "bstr 1.0.1", "thiserror", @@ -1181,7 +1247,9 @@ dependencies = [ [[package]] name = "git-worktree" -version = "0.9.0" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17d748c54c3d904c914b987654a1416c7abe7cf048fdc83eeae69e6ac3d76f20" dependencies = [ "bstr 1.0.1", "git-attributes", diff --git a/Cargo.toml b/Cargo.toml index 567020f9d..f387e7b4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ anyhow = { version = "1.0" } atty = "0.2" clap = { version = "4.0", features = ["cargo", "derive", "env", "unicode", "wrap_help"] } git2 = { version = "0.15", features = ["vendored-libgit2", "vendored-openssl"] } -git-repository = { version = "0.29.0", features = ["max-performance"] } +git-repository = { version = "0.30.0", features = ["max-performance"] } libc = "0.2" libgit2-sys = "*" hex = "0.4" diff --git a/src/input_enumerator.rs b/src/input_enumerator.rs index e83324736..e4f3626c2 100644 --- a/src/input_enumerator.rs +++ b/src/input_enumerator.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; use git2::{Oid, Repository, RepositoryOpenFlags}; use git_repository as git; use ignore::{WalkBuilder, WalkState}; @@ -143,7 +143,7 @@ impl<'t> ignore::ParallelVisitor for Visitor<'t> { }); } } else if metadata.is_dir() { - match open_git2_repo(path) { + match open_git_repo(path) { Err(e) => { error!("Failed to open Git repository at {:?}: {}; skipping", path, e); return WalkState::Skip; @@ -275,9 +275,13 @@ pub fn open_git2_repo(path: &Path) -> Result> { pub fn open_git_repo(path: &Path) -> Result> { match git::open_opts( path, - git::open::Options::isolated() + { + let mut opts = git::open::Options::isolated(); + opts.permissions.env.objects = git::sec::Permission::Allow; + opts + } ) { - Err(git::open::Error::NotARepository{..}) => Ok(None), + Err(git::open::Error::NotARepository{ .. }) => Ok(None), Err(err) => Err(err.into()), Ok(r) => Ok(Some(r)), } @@ -288,11 +292,11 @@ pub struct GitRepoEnumeratorResult { } pub struct GitRepoEnumerator<'a> { - repo: &'a Repository, + repo: &'a git::Repository, } impl<'a> GitRepoEnumerator<'a> { - pub fn new(repo: &'a Repository) -> Self { + pub fn new(repo: &'a git::Repository) -> Self { GitRepoEnumerator { repo } } @@ -306,29 +310,26 @@ impl<'a> GitRepoEnumerator<'a> { // } pub fn run(&self, progress: &mut Progress) -> Result { + use git::prelude::HeaderExt; let mut blobs: Vec<(Oid, u64)> = Vec::new(); - let odb = self.repo.odb()?; - odb.foreach(|oid: &git2::Oid| { - let (obj_size, obj_type) = match odb.read_header(*oid) { - Err(e) => { - error!("Failed to read object header {}: {}", oid, e); - return true; - } - Ok(v) => v, - }; + let odb = &self.repo.objects; + for oid in odb.iter()? + .with_ordering(git::odb::store::iter::Ordering::PackAscendingOffsetThenLooseLexicographical) + .filter_map(Result::ok) { + let hdr = odb.header(oid).with_context(|| format!("Failed to read object header {}", oid))?; + let obj_type = hdr.kind(); match obj_type { - git2::ObjectType::Blob => { - let obj_size = obj_size as u64; + git::object::Kind::Blob => { + let obj_size = hdr.size() as u64; progress.inc(obj_size); - blobs.push((*oid, obj_size)); + blobs.push((git2::Oid::from_bytes(oid.as_bytes())?, obj_size)); // let read_size = odb.read(*oid).unwrap().len(); // assert_eq!(obj_size, read_size); } _ => {} } - true - })?; + }; Ok(GitRepoEnumeratorResult { blobs }) }