diff --git a/Cargo.lock b/Cargo.lock index d7595050a6..f50b32c142 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5029,6 +5029,7 @@ name = "mise" version = "2026.1.12" dependencies = [ "age", + "aho-corasick", "anyhow", "aqua-registry", "async-backtrace", diff --git a/Cargo.toml b/Cargo.toml index f7a8b0667f..ee7373f4dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,7 @@ opt-level = 3 [dependencies] age = { version = "0.11", features = ["ssh"] } +aho-corasick = "1" anyhow = "1" async-backtrace = "0.2" async-trait = "0.1" diff --git a/src/cmd.rs b/src/cmd.rs index ee4002f9f2..856e932789 100644 --- a/src/cmd.rs +++ b/src/cmd.rs @@ -8,10 +8,10 @@ use std::sync::mpsc::channel; use std::sync::{Arc, Mutex, RwLock}; use std::thread; +use crate::redactions::Redactor; use color_eyre::Result; use duct::{Expression, IntoExecutablePath}; use eyre::Context; -use indexmap::IndexSet; #[cfg(not(any(test, target_os = "windows")))] use signal_hook::consts::{SIGHUP, SIGINT, SIGQUIT, SIGTERM, SIGUSR1, SIGUSR2}; #[cfg(not(any(test, target_os = "windows")))] @@ -102,7 +102,7 @@ pub struct CmdLineRunner<'a> { pr: Option<&'a dyn SingleReport>, pr_arc: Option>>, stdin: Option, - redactions: IndexSet, + redactor: Redactor, raw: bool, pass_signals: bool, on_stdout: Option>, @@ -125,7 +125,7 @@ impl<'a> CmdLineRunner<'a> { pr: None, pr_arc: None, stdin: None, - redactions: Default::default(), + redactor: Default::default(), raw: false, pass_signals: false, on_stdout: None, @@ -177,11 +177,7 @@ impl<'a> CmdLineRunner<'a> { } pub fn redact(mut self, redactions: impl IntoIterator) -> Self { - for r in redactions { - if !r.is_empty() { - self.redactions.insert(r); - } - } + self.redactor = self.redactor.with_additional(redactions); self } @@ -380,18 +376,12 @@ impl<'a> CmdLineRunner<'a> { for line in rx { match line { ChildProcessOutput::Stdout(line) => { - let line = self - .redactions - .iter() - .fold(line, |acc, r| acc.replace(r, "[redacted]")); + let line = self.redactor.redact(&line); self.on_stdout(line.clone()); combined_output.push(line); } ChildProcessOutput::Stderr(line) => { - let line = self - .redactions - .iter() - .fold(line, |acc, r| acc.replace(r, "[redacted]")); + let line = self.redactor.redact(&line); self.on_stderr(line.clone()); combined_output.push(line); } diff --git a/src/config/mod.rs b/src/config/mod.rs index 5ce6fd16af..096d955dfa 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -6,7 +6,6 @@ pub use settings::Settings; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::fmt::{Debug, Formatter}; use std::iter::once; -use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::LazyLock as Lazy; use std::sync::{Arc, Mutex, RwLock}; @@ -44,6 +43,7 @@ use crate::env_diff::EnvMap; use crate::hook_env::WatchFilePattern; use crate::hooks::Hook; use crate::plugins::PluginType; +use crate::redactions::Redactor; use crate::tera::BASE_CONTEXT; use crate::watch_files::WatchFile; use crate::wildcard::Wildcard; @@ -79,7 +79,7 @@ pub struct Alias { } static _CONFIG: RwLock>> = RwLock::new(None); -static _REDACTIONS: Lazy>>> = Lazy::new(Default::default); +static _REDACTOR: Lazy> = Lazy::new(Default::default); pub fn is_loaded() -> bool { _CONFIG.read().unwrap().is_some() @@ -688,60 +688,25 @@ impl Config { .collect() } pub fn add_redactions(&self, redactions: impl IntoIterator, env: &EnvMap) { - let mut r = _REDACTIONS.lock().unwrap(); - let redactions = redactions.into_iter().flat_map(|r| { - let matcher = Wildcard::new(vec![r]); + let mut r = _REDACTOR.lock().unwrap(); + let new_redactions = redactions.into_iter().flat_map(|pattern| { + let matcher = Wildcard::new(vec![pattern]); env.iter() .filter(|(k, _)| matcher.match_any(k)) .map(|(_, v)| v.clone()) .collect::>() }); - *r = Arc::new(r.iter().cloned().chain(redactions).collect()); + *r = r.with_additional(new_redactions); } + /// Get the current redaction patterns. pub fn redactions(&self) -> Arc> { - let r = _REDACTIONS.lock().unwrap(); - r.deref().clone() - - // self.redactions.get_or_try_init(|| { - // let mut redactions = Redactions::default(); - // for cf in self.config_files.values() { - // let r = cf.redactions(); - // if !r.is_empty() { - // let mut r = r.clone(); - // let (tera, ctx) = self.tera(&cf.config_root()); - // r.render(&mut tera.clone(), &ctx)?; - // redactions.merge(r); - // } - // } - // if redactions.is_empty() { - // return Ok(Default::default()); - // } - // - // let ts = self.get_toolset()?; - // let env = ts.full_env()?; - // - // let env_matcher = Wildcard::new(redactions.env.clone()); - // let var_matcher = Wildcard::new(redactions.vars.clone()); - // - // let env_vals = env - // .into_iter() - // .filter(|(k, _)| env_matcher.match_any(k)) - // .map(|(_, v)| v); - // let var_vals = self - // .vars - // .iter() - // .filter(|(k, _)| var_matcher.match_any(k)) - // .map(|(_, v)| v.to_string()); - // Ok(env_vals.chain(var_vals).collect()) - // }) - } - - pub fn redact(&self, mut input: String) -> String { - for redaction in self.redactions().deref() { - input = input.replace(redaction, "[redacted]"); - } - input + _REDACTOR.lock().unwrap().patterns_arc() + } + + /// Redact sensitive values from a string using Aho-Corasick for efficiency. + pub fn redact(&self, input: &str) -> String { + _REDACTOR.lock().unwrap().redact(input) } } diff --git a/src/logger.rs b/src/logger.rs index a56e2d8266..1f5ccd8abb 100644 --- a/src/logger.rs +++ b/src/logger.rs @@ -23,18 +23,31 @@ impl log::Log for Logger { } fn log(&self, record: &Record) { - if record.level() <= self.file_level - && let Some(log_file) = &self.log_file - { + let term_level = *self.term_level.lock().unwrap(); + let will_log_file = record.level() <= self.file_level && self.log_file.is_some(); + let will_log_term = record.level() <= term_level; + + if !will_log_file && !will_log_term { + return; + } + + // Redact once for all outputs (Aho-Corasick makes this efficient) + let args = record.args().to_string(); + let args = if config::is_loaded() { + Config::get_().redact(&args) + } else { + args + }; + + if will_log_file && let Some(log_file) = &self.log_file { let mut log_file = log_file.lock().unwrap(); - let out = self.render(record, self.file_level); + let out = self.render(record, self.file_level, &args); if !out.is_empty() { let _ = writeln!(log_file, "{}", console::strip_ansi_codes(&out)); } } - let term_level = *self.term_level.lock().unwrap(); - if record.level() <= term_level { - let out = self.render(record, term_level); + if will_log_term { + let out = self.render(record, term_level, &args); if !out.is_empty() { ui::multi_progress_report::MultiProgressReport::suspend_if_active(|| { eprintln!("{out}"); @@ -66,12 +79,7 @@ impl Logger { logger } - fn render(&self, record: &Record, level: LevelFilter) -> String { - let mut args = record.args().to_string(); - if config::is_loaded() { - let config = Config::get_(); - args = config.redact(args); - } + fn render(&self, record: &Record, level: LevelFilter, args: &str) -> String { match level { LevelFilter::Off => "".to_string(), LevelFilter::Trace => { diff --git a/src/redactions.rs b/src/redactions.rs index fc952137fa..09d99d9f2b 100644 --- a/src/redactions.rs +++ b/src/redactions.rs @@ -1,4 +1,6 @@ +use aho_corasick::AhoCorasick; use indexmap::IndexSet; +use std::sync::Arc; #[derive(Default, Clone, Debug, serde::Deserialize)] pub struct Redactions(pub IndexSet); @@ -19,3 +21,141 @@ impl Redactions { self.0.is_empty() } } + +/// A redactor that uses Aho-Corasick for efficient multi-pattern string replacement. +/// +/// This is more efficient than iterating through patterns and calling `str::replace()` +/// for each one, especially when there are many patterns. Aho-Corasick finds all +/// matches in a single pass through the text - O(n + z) vs O(n * m). +#[derive(Clone)] +pub struct Redactor { + patterns: Arc>, + automaton: Option>, +} + +impl Default for Redactor { + fn default() -> Self { + Self { + patterns: Arc::new(IndexSet::new()), + automaton: None, + } + } +} + +impl Redactor { + /// Create a new redactor from a set of patterns to redact. + pub fn new(patterns: impl IntoIterator) -> Self { + let patterns: IndexSet = patterns.into_iter().filter(|p| !p.is_empty()).collect(); + let automaton = if patterns.is_empty() { + None + } else { + // Build the Aho-Corasick automaton - O(m) where m is total pattern length + AhoCorasick::new(patterns.iter()).ok().map(Arc::new) + }; + Self { + patterns: Arc::new(patterns), + automaton, + } + } + + /// Create a new redactor by adding more patterns to an existing one. + pub fn with_additional(&self, additional: impl IntoIterator) -> Self { + let mut patterns = (*self.patterns).clone(); + for p in additional { + if !p.is_empty() { + patterns.insert(p); + } + } + Self::new(patterns) + } + + /// Returns the patterns being redacted. + #[cfg_attr(not(test), allow(dead_code))] + pub fn patterns(&self) -> &IndexSet { + &self.patterns + } + + /// Returns the patterns as an Arc for efficient sharing. + pub fn patterns_arc(&self) -> Arc> { + Arc::clone(&self.patterns) + } + + /// Redact all matching patterns in the input string, replacing them with `[redacted]`. + /// + /// This is O(n + z) where n is the input length and z is the number of matches, + /// compared to O(n * m) for the naive approach of iterating through m patterns. + pub fn redact(&self, input: &str) -> String { + match &self.automaton { + Some(ac) => { + // Each pattern needs its own replacement string + let replacements: Vec<&str> = vec!["[redacted]"; self.patterns.len()]; + ac.replace_all(input, &replacements) + } + None if self.patterns.is_empty() => input.to_string(), + None => { + // Fallback to naive approach if automaton failed to build + let mut result = input.to_string(); + for pattern in self.patterns.iter() { + result = result.replace(pattern, "[redacted]"); + } + result + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_redactor() { + let r = Redactor::default(); + assert_eq!(r.redact("hello world"), "hello world"); + } + + #[test] + fn test_single_pattern() { + let r = Redactor::new(["secret".to_string()]); + assert_eq!(r.redact("my secret value"), "my [redacted] value"); + } + + #[test] + fn test_multiple_patterns() { + let r = Redactor::new(["secret".to_string(), "password".to_string()]); + assert_eq!( + r.redact("secret and password here"), + "[redacted] and [redacted] here" + ); + } + + #[test] + fn test_overlapping_patterns() { + let r = Redactor::new(["abc".to_string(), "bc".to_string()]); + let result = r.redact("abcd"); + // Should replace "abc" first, leaving "d" + assert_eq!(result, "[redacted]d"); + } + + #[test] + fn test_multiple_occurrences() { + let r = Redactor::new(["token".to_string()]); + assert_eq!(r.redact("token1 and token2"), "[redacted]1 and [redacted]2"); + } + + #[test] + fn test_with_additional() { + let r1 = Redactor::new(["secret".to_string()]); + let r2 = r1.with_additional(["password".to_string()]); + + assert_eq!(r1.redact("secret password"), "[redacted] password"); + assert_eq!(r2.redact("secret password"), "[redacted] [redacted]"); + } + + #[test] + fn test_empty_patterns_filtered() { + let r = Redactor::new(["".to_string(), "secret".to_string(), "".to_string()]); + assert_eq!(r.patterns().len(), 1); + assert_eq!(r.redact("my secret"), "my [redacted]"); + } +} diff --git a/src/task/task_executor.rs b/src/task/task_executor.rs index 435f2e492e..9a6c8ab8eb 100644 --- a/src/task/task_executor.rs +++ b/src/task/task_executor.rs @@ -464,7 +464,7 @@ impl TaskExecutor { let script = script.trim_start(); let cmd = format!("$ {script} {args}", args = args.join(" ")).to_string(); if !self.quiet(Some(task)) { - let msg = style::ebold(trunc(prefix, config.redact(cmd).trim())) + let msg = style::ebold(trunc(prefix, config.redact(&cmd).trim())) .bright() .to_string(); self.eprint(task, prefix, &msg) @@ -570,7 +570,7 @@ impl TaskExecutor { .trim() .to_string(); let cmd = style::ebold(format!("$ {cmd}")).bright().to_string(); - let cmd = trunc(prefix, config.redact(cmd).trim()); + let cmd = trunc(prefix, config.redact(&cmd).trim()); self.eprint(task, prefix, &cmd); }