Skip to content

Commit

Permalink
output: add --stats flag
Browse files Browse the repository at this point in the history
This commit provides basic support for a --stats flag, which will print
various aggregate statistics about a search after all of the results
have been printed. This is mostly intended to support a similar feature
found in the Silver Searcher. Note though that we don't emit the total
bytes searched; this is a first pass at an implementation and we can
improve upon it later.

Closes #411, Closes #799
  • Loading branch information
balajisivaraman authored and BurntSushi committed Mar 10, 2018
1 parent 11a8f0e commit 00520b3
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 4 deletions.
1 change: 1 addition & 0 deletions complete/_rg
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ _rg() {
'(-e -f --file --files --regexp --type-list)1: :_rg_pattern'
'(--type-list)*:file:_files'
'(-z --search-zip)'{-z,--search-zip}'[search in compressed files]'
"(--stats)--stats[print stats about this search]"
)

[[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && {
Expand Down
20 changes: 20 additions & 0 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_search_zip(&mut args);
flag_smart_case(&mut args);
flag_sort_files(&mut args);
flag_stats(&mut args);
flag_text(&mut args);
flag_threads(&mut args);
flag_type(&mut args);
Expand Down Expand Up @@ -1488,6 +1489,25 @@ This flag can be disabled with --no-sort-files.
args.push(arg);
}

fn flag_stats(args: &mut Vec<RGArg>) {
const SHORT: &str = "Print statistics about this ripgrep search.";
const LONG: &str = long!("\
Print aggregate statistics about this ripgrep search. When this flag is
present, ripgrep will print the following stats to stdout at the end of the
search: number of matched lines, number of files with matches, number of files
searched, and the time taken for the entire search to complete.
This set of aggregate statistics may expand over time.
Note that this flag has no effect if --files, --files-with-matches or
--files-without-match is passed.");

let arg = RGArg::switch("stats")
.help(SHORT).long_help(LONG);

args.push(arg);
}

fn flag_text(args: &mut Vec<RGArg>) {
const SHORT: &str = "Search binary files as if they were text.";
const LONG: &str = long!("\
Expand Down
25 changes: 23 additions & 2 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ pub struct Args {
type_list: bool,
types: Types,
with_filename: bool,
search_zip_files: bool
search_zip_files: bool,
stats: bool
}

impl Args {
Expand Down Expand Up @@ -221,6 +222,12 @@ impl Args {
self.max_count == Some(0)
}


/// Returns whether ripgrep should track stats for this run
pub fn stats(&self) -> bool {
self.stats
}

/// Create a new writer for single-threaded searching with color support.
pub fn stdout(&self) -> termcolor::StandardStream {
termcolor::StandardStream::stdout(self.color_choice)
Expand Down Expand Up @@ -411,7 +418,8 @@ impl<'a> ArgMatches<'a> {
type_list: self.is_present("type-list"),
types: self.types()?,
with_filename: with_filename,
search_zip_files: self.is_present("search-zip")
search_zip_files: self.is_present("search-zip"),
stats: self.stats()
};
if args.mmap {
debug!("will try to use memory maps");
Expand Down Expand Up @@ -825,6 +833,19 @@ impl<'a> ArgMatches<'a> {
}
}

/// Returns whether status should be tracked for this run of ripgrep

/// This is automatically disabled if we're asked to only list the
/// files that wil be searched, files with matches or files
/// without matches.
fn stats(&self) -> bool {
if self.is_present("files-with-matches") ||
self.is_present("files-without-match") {
return false;
}
self.is_present("stats")
}

/// Returns the approximate number of threads that ripgrep should use.
fn threads(&self) -> Result<usize> {
if self.is_present("sort-files") {
Expand Down
52 changes: 50 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::mpsc;
use std::thread;
use std::time::{Duration, Instant};

use args::Args;
use worker::Work;
Expand Down Expand Up @@ -85,16 +86,19 @@ fn run(args: Arc<Args>) -> Result<u64> {
}

fn run_parallel(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let bufwtr = Arc::new(args.buffer_writer());
let quiet_matched = args.quiet_matched();
let paths_searched = Arc::new(AtomicUsize::new(0));
let match_line_count = Arc::new(AtomicUsize::new(0));
let paths_matched = Arc::new(AtomicUsize::new(0));

args.walker_parallel().run(|| {
let args = Arc::clone(args);
let quiet_matched = quiet_matched.clone();
let paths_searched = paths_searched.clone();
let match_line_count = match_line_count.clone();
let paths_matched = paths_matched.clone();
let bufwtr = Arc::clone(&bufwtr);
let mut buf = bufwtr.buffer();
let mut worker = args.worker();
Expand Down Expand Up @@ -129,6 +133,9 @@ fn run_parallel(args: &Arc<Args>) -> Result<u64> {
if quiet_matched.set_match(count > 0) {
return Quit;
}
if args.stats() && count > 0 {
paths_matched.fetch_add(1, Ordering::SeqCst);
}
}
// BUG(burntsushi): We should handle this error instead of ignoring
// it. See: https://github.com/BurntSushi/ripgrep/issues/200
Expand All @@ -141,15 +148,28 @@ fn run_parallel(args: &Arc<Args>) -> Result<u64> {
eprint_nothing_searched();
}
}
Ok(match_line_count.load(Ordering::SeqCst) as u64)
let match_line_count = match_line_count.load(Ordering::SeqCst) as u64;
let paths_searched = paths_searched.load(Ordering::SeqCst) as u64;
let paths_matched = paths_matched.load(Ordering::SeqCst) as u64;
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
}
Ok(match_line_count)
}

fn run_one_thread(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let stdout = args.stdout();
let mut stdout = stdout.lock();
let mut worker = args.worker();
let mut paths_searched: u64 = 0;
let mut match_line_count = 0;
let mut paths_matched: u64 = 0;
for result in args.walker() {
let dent = match get_or_log_dir_entry(
result,
Expand All @@ -170,18 +190,30 @@ fn run_one_thread(args: &Arc<Args>) -> Result<u64> {
}
}
paths_searched += 1;
match_line_count +=
let count =
if dent.is_stdin() {
worker.run(&mut printer, Work::Stdin)
} else {
worker.run(&mut printer, Work::DirEntry(dent))
};
match_line_count += count;
if args.stats() && count > 0 {
paths_matched += 1;
}
}
if !args.paths().is_empty() && paths_searched == 0 {
if !args.no_messages() {
eprint_nothing_searched();
}
}
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
}
Ok(match_line_count)
}

Expand Down Expand Up @@ -373,6 +405,22 @@ fn eprint_nothing_searched() {
Try running again with --debug.");
}

fn print_stats(
match_count: u64,
paths_searched: u64,
paths_matched: u64,
time_elapsed: Duration,
) {
let time_elapsed =
time_elapsed.as_secs() as f64
+ (time_elapsed.subsec_nanos() as f64 * 1e-9);
println!("\n{} matched lines\n\
{} files contained matches\n\
{} files searched\n\
{:.3} seconds", match_count, paths_matched,
paths_searched, time_elapsed);
}

// The Rust standard library suppresses the default SIGPIPE behavior, so that
// writing to a closed pipe doesn't kill the process. The goal is to instead
// handle errors through the normal result mechanism. Ripgrep needs some
Expand Down
44 changes: 44 additions & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,50 @@ be, to a very large extent, the result of luck. Sherlock Holmes
assert_eq!(lines, expected);
});

sherlock!(feature_411_single_threaded_search_stats,
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--stats");

let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("2 matched lines"), true);
assert_eq!(lines.contains("1 files contained matches"), true);
assert_eq!(lines.contains("1 files searched"), true);
assert_eq!(lines.contains("seconds"), true);
});

#[test]
fn feature_411_parallel_search_stats() {
let wd = WorkDir::new("feature_411");
wd.create("sherlock_1", hay::SHERLOCK);
wd.create("sherlock_2", hay::SHERLOCK);

let mut cmd = wd.command();
cmd.arg("--stats");
cmd.arg("Sherlock");

let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("4 matched lines"), true);
assert_eq!(lines.contains("2 files contained matches"), true);
assert_eq!(lines.contains("2 files searched"), true);
assert_eq!(lines.contains("seconds"), true);
}

sherlock!(feature_411_ignore_stats_1, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-with-matches");
cmd.arg("--stats");

let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("seconds"), false);
});

sherlock!(feature_411_ignore_stats_2, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-without-match");
cmd.arg("--stats");

let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("seconds"), false);
});

#[test]
fn feature_740_passthru() {
let wd = WorkDir::new("feature_740");
Expand Down

0 comments on commit 00520b3

Please sign in to comment.