diff --git a/Cargo.lock b/Cargo.lock index a5850e13..99e0b21a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,6 +43,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "aliasable" version = "0.1.3" @@ -404,11 +413,12 @@ dependencies = [ [[package]] name = "bstr" -version = "0.2.17" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", + "serde", ] [[package]] @@ -1064,23 +1074,17 @@ dependencies = [ "cynic-codegen", ] -[[package]] -name = "glob" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" - [[package]] name = "globset" -version = "0.4.8" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "bstr", - "fnv", "log", - "regex", + "regex-automata 0.4.13", + "regex-syntax 0.8.5", ] [[package]] @@ -1535,7 +1539,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ - "regex-automata", + "regex-automata 0.1.10", ] [[package]] @@ -2211,7 +2215,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" dependencies = [ - "aho-corasick", + "aho-corasick 0.7.18", "memchr", "regex-syntax 0.6.27", ] @@ -2225,6 +2229,17 @@ dependencies = [ "regex-syntax 0.6.27", ] +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick 1.1.4", + "memchr", + "regex-syntax 0.8.5", +] + [[package]] name = "regex-syntax" version = "0.6.27" @@ -3186,7 +3201,7 @@ dependencies = [ "dotenvy", "futures", "github-graphql", - "glob", + "globset", "hex", "hmac", "hyper", diff --git a/Cargo.toml b/Cargo.toml index 7a935a9d..3011e7a0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ anyhow = "1" hex = "0.4" parser = { path = "parser" } rust_team_data = { git = "https://github.com/rust-lang/team" } -glob = "0.3.0" toml = "0.8.20" axum = "0.8.4" hyper = { version = "1.6", features = ["server", "http1"] } @@ -53,6 +52,7 @@ pulldown-cmark-escape = "0.11.0" axum-extra = { version = "0.10.1", default-features = false } unicode-segmentation = "1.12.0" secrecy = { version = "0.10", features = ["serde"] } +globset = { version = "0.4.18", default-features = false } [dependencies.serde] version = "1" diff --git a/src/handlers/autolabel.rs b/src/handlers/autolabel.rs index 0b823597..62c515f2 100644 --- a/src/handlers/autolabel.rs +++ b/src/handlers/autolabel.rs @@ -56,24 +56,29 @@ pub(super) async fn parse_input( let mut to_remove = Vec::new(); 'outer: for (label, cfg) in &config.labels { - let exclude_patterns: Vec = cfg - .exclude_labels - .iter() - .filter_map(|label| match glob::Pattern::new(label) { - Ok(exclude_glob) => Some(exclude_glob), - Err(error) => { - log::error!("Invalid glob pattern: {error}"); - None - } - }) - .collect(); + let exclude_patterns = + cfg.exclude_labels + .iter() + .filter_map(|label| match globset::Glob::new(label) { + Ok(exclude_glob) => Some(exclude_glob), + Err(error) => { + log::error!("invalid glob pattern: {error}"); + None + } + }); + + let exclude_patterns = match globset::GlobSet::new(exclude_patterns) { + Ok(exclude_patterns) => exclude_patterns, + Err(err) => { + log::error!("failed to create a glob set: {err:?}"); + continue; + } + }; for label in event.issue.labels() { - for pat in &exclude_patterns { - if pat.matches(&label.name) { - // If we hit an excluded label, ignore this autolabel and check the next - continue 'outer; - } + if exclude_patterns.is_match(&label.name) { + // If we hit an excluded label, ignore this autolabel and check the next + continue 'outer; } } @@ -161,24 +166,30 @@ pub(super) async fn parse_input( let applied_label = &label.name; 'outer: for (label, config) in config.get_by_trigger(applied_label) { - let exclude_patterns: Vec = config - .exclude_labels - .iter() - .filter_map(|label| match glob::Pattern::new(label) { - Ok(exclude_glob) => Some(exclude_glob), - Err(error) => { - log::error!("Invalid glob pattern: {error}"); - None - } - }) - .collect(); + let exclude_patterns = + config + .exclude_labels + .iter() + .filter_map(|label| match globset::Glob::new(label) { + Ok(exclude_glob) => Some(exclude_glob), + Err(error) => { + log::error!("invalid glob pattern: {error}"); + None + } + }); + + let exclude_patterns = match globset::GlobSet::new(exclude_patterns) { + Ok(exclude_patterns) => exclude_patterns, + Err(err) => { + log::error!("failed to create a glob set: {err:?}"); + continue; + } + }; for label in event.issue.labels() { - for pat in &exclude_patterns { - if pat.matches(&label.name) { - // If we hit an excluded label, ignore this autolabel and check the next - continue 'outer; - } + if exclude_patterns.is_match(&label.name) { + // If we hit an excluded label, ignore this autolabel and check the next + continue 'outer; } } diff --git a/src/handlers/check_commits/validate_config.rs b/src/handlers/check_commits/validate_config.rs index 018e3069..c94b83df 100644 --- a/src/handlers/check_commits/validate_config.rs +++ b/src/handlers/check_commits/validate_config.rs @@ -2,7 +2,7 @@ //! changes are a valid configuration file. use crate::{ - config::CONFIG_FILE_NAME, + config::{CONFIG_FILE_NAME, MentionsEntryConfig, MentionsEntryType}, github::FileDiff, handlers::{Context, IssuesEvent}, }; @@ -68,6 +68,20 @@ pub(super) async fn validate_config( )); } + // Error if one the mentions entry is not a valid glob. + if let Some(mentions) = config.mentions { + for (entry, MentionsEntryConfig { type_, .. }) in mentions.entries { + if type_ == MentionsEntryType::Filename + && let Err(err) = globset::Glob::new(&entry) + { + return Ok(Some(format!( + "Invalid `triagebot.toml`:\n\ + `[mentions.\"{entry}\"]` has an invalid glob syntax: {err}" + ))); + } + } + } + Ok(None) } } diff --git a/src/handlers/mentions.rs b/src/handlers/mentions.rs index a9b4f754..60bdf3b5 100644 --- a/src/handlers/mentions.rs +++ b/src/handlers/mentions.rs @@ -13,7 +13,6 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::path::Path; use std::{fmt::Write, path::PathBuf}; -use tracing as log; const MENTIONS_KEY: &str = "mentions"; @@ -51,54 +50,59 @@ pub(super) async fn parse_input( return Ok(None); } - if let Some(files) = event - .issue - .diff(&ctx.github) - .await - .map_err(|e| log::error!("failed to fetch diff: {e:?}")) - .unwrap_or_default() - { - let file_paths: Vec<_> = files.iter().map(|fd| Path::new(&fd.filename)).collect(); - let to_mention: Vec<_> = config - .entries - .iter() - .filter_map(|(entry, MentionsEntryConfig { cc, type_, .. })| { - let relevant_file_paths: Vec = match type_ { - MentionsEntryType::Filename => { - let path = Path::new(entry); - // Only mention matching paths. - file_paths - .iter() - .filter(|p| p.starts_with(path)) - .map(PathBuf::from) - .collect() - } - MentionsEntryType::Content => { - // Only mentions byte-for-byte matching content inside the patch. - files - .iter() - .filter(|f| patch_adds(&f.patch, entry)) - .map(|f| PathBuf::from(&f.filename)) - .collect() - } - }; - // Don't mention if only the author is in the list. - let pings_non_author = match &cc[..] { - [only_cc] => only_cc.trim_start_matches('@') != event.issue.user.login, - _ => true, - }; - if !relevant_file_paths.is_empty() && pings_non_author { - Some((entry.to_string(), relevant_file_paths)) - } else { - None - } - }) - .collect(); - if !to_mention.is_empty() { - return Ok(Some(MentionsInput { to_mention })); + // Fetch the PR diff + let diff = event.issue.diff(&ctx.github).await; + + // Print the error if we got one + let Ok(Some(modified_files)) = diff else { + if let Err(err) = diff { + tracing::error!("failed to fetch diff for mentions handler: {err:?}"); } + return Ok(None); + }; + + let modified_paths: Vec<_> = modified_files + .iter() + .map(|fd| Path::new(&fd.filename)) + .collect(); + + let to_mention: Vec<_> = config + .entries + .iter() + .filter_map(|(entry, MentionsEntryConfig { cc, type_, .. })| { + let relevant_file_paths: Vec = match type_ { + MentionsEntryType::Filename => { + // Only mention matching paths. + modified_paths_matches(&modified_paths, entry) + } + MentionsEntryType::Content => { + // Only mentions byte-for-byte matching content inside the patch. + modified_files + .iter() + .filter(|f| patch_adds(&f.patch, entry)) + .map(|f| PathBuf::from(&f.filename)) + .collect() + } + }; + + // Don't mention if only the author is in the list. + let pings_non_author = match &cc[..] { + [only_cc] => only_cc.trim_start_matches('@') != event.issue.user.login, + _ => true, + }; + if !relevant_file_paths.is_empty() && pings_non_author { + Some((entry.to_string(), relevant_file_paths)) + } else { + None + } + }) + .collect(); + + if to_mention.is_empty() { + Ok(None) + } else { + Ok(Some(MentionsInput { to_mention })) } - Ok(None) } pub(super) async fn handle_input( @@ -154,6 +158,40 @@ pub(super) async fn handle_input( Ok(()) } +fn modified_paths_matches(modified_paths: &[&Path], entry: &str) -> Vec { + // Prepare the glob pattern from the entry. + // + // Note that we add an extra `*` at the end in order to get the "starts with" mode + // that we want. + let pattern = if entry.ends_with('*') { + entry.to_string() + } else { + format!("{entry}*") + }; + + // Create the glob pattern and log an error (should have already been reported to + // the user). + let pattern = match globset::GlobBuilder::new(&pattern) + .empty_alternates(true) + .build() + { + Ok(pattern) => pattern, + Err(err) => { + tracing::error!("invalid glob pattern for {entry}: {err:?}"); + return Vec::new(); + } + }; + + // Compile the glob pattern to a (Regex) matcher + let matcher = pattern.compile_matcher(); + + modified_paths + .iter() + .filter(|p| matcher.is_match(p)) + .map(PathBuf::from) + .collect() +} + fn patch_adds(patch: &str, needle: &str) -> bool { patch .lines() @@ -216,4 +254,155 @@ mod tests { "; assert!(!patch_adds(patch, "missing")); } + + #[test] + fn entry_not_modified() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("library/Cargo.lock"), + Path::new("library/Cargo.toml") + ], + "compiler/rustc_span/", + ), + Vec::::new() + ); + } + + #[test] + fn entry_modified() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("Cargo.lock"), + Path::new("compiler/rustc_span/src/lib.rs"), + Path::new("compiler/rustc_span/src/symbol.rs"), + ], + "compiler/rustc_span/", + ), + vec![ + PathBuf::from("compiler/rustc_span/src/lib.rs"), + PathBuf::from("compiler/rustc_span/src/symbol.rs"), + ] + ); + } + + #[test] + fn entry_filename_modified() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("compiler/rustc_span/src/lib.rs"), + Path::new("compiler/rustc_span/src/symbol.rs"), + ], + "compiler/rustc_span/src/lib.rs", + ), + vec![PathBuf::from("compiler/rustc_span/src/lib.rs")] + ); + } + + #[test] + fn entry_top_level_filename_modified() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("Cargo.lock"), + Path::new("Cargo.toml"), + Path::new(".git/submodules"), + ], + "Cargo.lock", + ), + vec![PathBuf::from("Cargo.lock")] + ); + } + + #[test] + fn entry_modified_glob_either() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("Cargo.lock"), + Path::new("Cargo.toml"), + Path::new("library/dec2flt/lib.rs"), + Path::new("library/flt2dec/lib.rs"), + ], + "library/{dec2flt,flt2dec}", + ), + vec![ + PathBuf::from("library/dec2flt/lib.rs"), + PathBuf::from("library/flt2dec/lib.rs"), + ] + ); + } + + #[test] + fn entry_modified_glob_star() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("Cargo.lock"), + Path::new("Cargo.toml"), + Path::new("library/dec2flt/lib.rs"), + Path::new("library/flt2dec/lib.rs"), + ], + "library/dec2*", + ), + vec![PathBuf::from("library/dec2flt/lib.rs")] + ); + } + + #[test] + fn entry_modified_glob_star_middle() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("compiler/x86-64-none_eabi.rs"), + Path::new("compiler/armv7-none_eabi-something.rs"), + Path::new("compiler/armv7-none_eabi-something.txt"), + Path::new("compiler/none_eabi.rs"), + ], + "compiler/*none_eabi*.rs", + ), + vec![ + PathBuf::from("compiler/x86-64-none_eabi.rs"), + PathBuf::from("compiler/armv7-none_eabi-something.rs"), + PathBuf::from("compiler/none_eabi.rs"), + ] + ); + } + + #[test] + fn entry_modified_glob_double_star() { + assert_eq!( + modified_paths_matches( + &[ + Path::new("Cargo.lock"), + Path::new("Cargo.toml"), + Path::new("library/.empty"), + Path::new("library/dec2flt/lib.rs"), + Path::new("library/flt2dec/lib.rs"), + ], + "library/**", + ), + vec![ + PathBuf::from("library/.empty"), + PathBuf::from("library/dec2flt/lib.rs"), + PathBuf::from("library/flt2dec/lib.rs"), + ] + ); + } + + #[test] + fn entry_modified_glob_empty_alternates() { + assert_eq!( + modified_paths_matches( + &[Path::new("result.rs"), Path::new("result.rs.stdout")], + "result.rs{,.stdout}", + ), + vec![ + PathBuf::from("result.rs"), + PathBuf::from("result.rs.stdout"), + ] + ); + } } diff --git a/src/handlers/notify_zulip.rs b/src/handlers/notify_zulip.rs index 6ee116f8..bd7b4570 100644 --- a/src/handlers/notify_zulip.rs +++ b/src/handlers/notify_zulip.rs @@ -161,14 +161,15 @@ fn parse_open_close_reopen_input( fn has_all_required_labels(issue: &Issue, config: &NotifyZulipLabelConfig) -> bool { for req_label in &config.required_labels { - let pattern = match glob::Pattern::new(req_label) { + let pattern = match globset::Glob::new(req_label) { Ok(pattern) => pattern, Err(err) => { log::error!("Invalid glob pattern: {err}"); continue; } }; - if !issue.labels().iter().any(|l| pattern.matches(&l.name)) { + let matcher = pattern.compile_matcher(); + if !issue.labels().iter().any(|l| matcher.is_match(&l.name)) { return false; } } diff --git a/src/handlers/relabel.rs b/src/handlers/relabel.rs index 257671e3..26f74450 100644 --- a/src/handlers/relabel.rs +++ b/src/handlers/relabel.rs @@ -148,13 +148,11 @@ fn match_pattern(pattern: &str, label: &str) -> anyhow::Result MatchPatternResult::Allow, (true, true) => MatchPatternResult::Deny, (false, _) => MatchPatternResult::NoMatch,