From 05d33166904ddd0d820115857012727e422e14e8 Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Mon, 5 Feb 2024 11:08:40 +0100 Subject: [PATCH 1/5] Run cargo fmt --- src/builder.rs | 8 ++++---- src/lib.rs | 2 +- src/main.rs | 17 +++++++++++++---- tests/query.rs | 2 +- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index 52a78ef..435dfcc 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -524,11 +524,11 @@ impl QueryBuilder { let mut result = if kind == "type_identifier" { "[ (type_identifier) (sized_type_specifier) (primitive_type)]".to_string() } else if kind == "identifier" && pattern.starts_with('$') { - if is_num_var(pattern) && parent!="declarator" { + if is_num_var(pattern) && parent != "declarator" { "(number_literal)".to_string() - } - else if self.cpp { - "[(identifier) (field_expression) (field_identifier) (qualified_identifier) (this)]".to_string() + } else if self.cpp { + "[(identifier) (field_expression) (field_identifier) (qualified_identifier) (this)]" + .to_string() } else { "[(identifier) (field_expression) (field_identifier)]".to_string() } diff --git a/src/lib.rs b/src/lib.rs index bc98744..ac7fbf2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,7 +59,7 @@ pub fn get_parser(cpp: bool) -> Parser { unsafe { tree_sitter_cpp() } }; - let mut parser = Parser::new(); + let mut parser = Parser::new(); if let Err(e) = parser.set_language(language) { eprintln!("{}", e); panic!(); diff --git a/src/main.rs b/src/main.rs index 1f3a495..09810a5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -189,7 +189,9 @@ fn main() { s.spawn(move |_| execute_queries_worker(ast_rx, results_tx, w, &args)); if w.len() > 1 { - s.spawn(move |_| multi_query_worker(results_rx, w.len(), before, after, enable_line_numbers)); + s.spawn(move |_| { + multi_query_worker(results_rx, w.len(), before, after, enable_line_numbers) + }); } }); } @@ -265,6 +267,7 @@ fn iter_files(path: &Path, extensions: Vec) -> impl Iterator, @@ -381,7 +384,12 @@ fn execute_queries_worker( "{}:{}\n{}", path.clone().bold(), line, - m.display(&source, args.before, args.after, args.enable_line_numbers) + m.display( + &source, + args.before, + args.after, + args.enable_line_numbers + ) ); } else { results_tx @@ -412,7 +420,7 @@ fn multi_query_worker( num_queries: usize, before: usize, after: usize, - enable_line_numbers: bool + enable_line_numbers: bool, ) { let mut query_results = Vec::with_capacity(num_queries); for _ in 0..num_queries { @@ -453,7 +461,8 @@ fn multi_query_worker( "{}:{}\n{}", r.path.bold(), line, - r.result.display(&r.source, before, after, enable_line_numbers) + r.result + .display(&r.source, before, after, enable_line_numbers) ); }) }); diff --git a/tests/query.rs b/tests/query.rs index a26d13c..be0f1ec 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -953,4 +953,4 @@ fn test_number2() { let matches = parse_and_match_cpp(needle, source); assert_eq!(matches, 1); -} \ No newline at end of file +} From 92d02a0d33eab21c3e81a381558fde4902b11c95 Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Tue, 6 Feb 2024 19:15:09 +0100 Subject: [PATCH 2/5] Fix clippy warnings and a typo --- src/cli.rs | 4 ++-- src/main.rs | 17 +++++++---------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index ab931f5..51689f6 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -178,8 +178,8 @@ pub fn parse_arguments() -> Args { let level = match matches.occurrences_of("v") { 0 => LevelFilter::Warn, - 1 => log::LevelFilter::Info, - _ => log::LevelFilter::Debug, + 1 => LevelFilter::Info, + _ => LevelFilter::Debug, }; let _ = SimpleLogger::init(level, Config::default()); diff --git a/src/main.rs b/src/main.rs index 09810a5..80a133d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -71,7 +71,7 @@ fn main() { // We also extract the identifiers at this point // to use them for file filtering later on. // Invalid patterns trigger a process exit in validate_query so - // after this point we now that all patterns are valid. + // after this point we know that all patterns are valid. // The loop also fills the `variables` set with used variable names. let work: Vec = args .pattern @@ -119,13 +119,10 @@ fn main() { v.iter() .map(|s| { let r = Regex::new(s); - match r { - Ok(regex) => regex, - Err(e) => { - eprintln!("Regex error {}", e); - std::process::exit(1) - } - } + r.unwrap_or_else(|e| { + eprintln!("Regex error {}", e); + std::process::exit(1) + }) }) .collect() }; @@ -138,7 +135,7 @@ fn main() { std::io::stdin() .lock() .lines() - .filter_map(|l| l.ok()) + .map_while(Result::ok) .map(|s| Path::new(&s).to_path_buf()) .collect() } else { @@ -304,7 +301,7 @@ fn parse_files_worker( let mut parser = tl .get_or(|| RefCell::new(weggli::get_parser(is_cpp))) .borrow_mut(); - let tree = parser.parse(&source.as_bytes(), None).unwrap(); + let tree = parser.parse(source.as_bytes(), None).unwrap(); Some((tree, source.to_string())) } }; From b2350f1230673a25a327d9e56d3e03a45e7c423a Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Mon, 5 Feb 2024 11:14:41 +0100 Subject: [PATCH 3/5] Fix alignment of -h/--help output Mostly caused by &str = "\NL .." removing the newline and any leading white space on the following line. ```diff @@ -16,1 -16,1 @@ -weggli 0.2.4 + weggli 0.2.4 Felix Wilhelm ARGS: - Search pattern. + Search pattern. A file or directory to search. [..] OPTIONS: - -A, --after Lines to print after a match. Default = 5. + -A, --after Lines to print after a match. Default = 5. -B, --before Lines to print before a match. Default = 5. [..] not: Negative sub queries. [..] - strict: Enable stricter matching. + strict: Enable stricter matching. [..] ``` and fix indentation of longer paragraphs and trailine white space. --- src/cli.rs | 177 ++++++++++++++++++++++++++--------------------------- 1 file changed, 88 insertions(+), 89 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 51689f6..ae0b82f 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -264,109 +264,108 @@ mod help { pub const ABOUT: &str = "\ weggli is a semantic search tool for C and C++ codebases. It is designed to quickly find interesting code pattern in large codebases. - + Use -h for short descriptions and --help for more details. - + Homepage: https://github.com/weggli-rs/weggli"; - pub const TEMPLATE: &str = "\ - {bin} {version} + pub const TEMPLATE: &str = " {bin} {version} {author} - + {about} - + USAGE: {usage} - + ARGS: - {positionals} - +{positionals} + OPTIONS: - {unified}"; +{unified}"; pub const PATTERN: &str = "\ - A weggli search pattern. weggli's query language closely resembles - C and C++ with a small number of extra features. - - For example, the pattern '{_ $buf[_]; memcpy($buf,_,_);}' will - find all calls to memcpy that directly write into a stack buffer. - - Besides normal C and C++ constructs, weggli's query language - supports the following features: - - _ Wildcard. Will match on any AST node. - - $var Variables. Can be used to write queries that are independent - of identifiers. Variables match on identifiers, types, - field names or namespaces. The --unique option - optionally enforces that $x != $y != $z. The --regex option can - enforce that the variable has to match (or not match) a - regular expression. - - _(..) Subexpressions. The _(..) wildcard matches on arbitrary - sub expressions. This can be helpful if you are looking for some - operation involving a variable, but don't know more about it. - For example, _(test) will match on expressions like test+10, - buf[test->size] or f(g(&test)); - - not: Negative sub queries. Only show results that do not match the - following sub query. For example, '{not: $fv==NULL; not: $fv!=NULL *$v;}' - would find pointer dereferences that are not preceded by a NULL check. - -strict: Enable stricter matching. This turns off statement unwrapping and greedy - function name matching. For example 'strict: func();' will not match - on 'if (func() == 1)..' or 'a->func()' anymore. - - weggli automatically unwraps expression statements in the query source - to search for the inner expression instead. This means that the query `{func($x);}` - will match on `func(a);`, but also on `if (func(a)) {..}` or `return func(a)`. - Matching on `func(a)` will also match on `func(a,b,c)` or `func(z,a)`. - Similarly, `void func($t $param)` will also match function definitions - with multiple parameters. - - Additional patterns can be specified using the --pattern (-p) option. This makes - it possible to search across functions or type definitions. - "; +A weggli search pattern. weggli's query language closely resembles +C and C++ with a small number of extra features. + +For example, the pattern '{_ $buf[_]; memcpy($buf,_,_);}' will +find all calls to memcpy that directly write into a stack buffer. + +Besides normal C and C++ constructs, weggli's query language +supports the following features: + +_ Wildcard. Will match on any AST node. + +$var Variables. Can be used to write queries that are independent + of identifiers. Variables match on identifiers, types, + field names or namespaces. The --unique option + optionally enforces that $x != $y != $z. The --regex option can + enforce that the variable has to match (or not match) a + regular expression. + +_(..) Subexpressions. The _(..) wildcard matches on arbitrary + sub expressions. This can be helpful if you are looking for some + operation involving a variable, but don't know more about it. + For example, _(test) will match on expressions like test+10, + buf[test->size] or f(g(&test)); + +not: Negative sub queries. Only show results that do not match the + following sub query. For example, '{not: $fv==NULL; not: $fv!=NULL *$v;}' + would find pointer dereferences that are not preceded by a NULL check. + +strict: Enable stricter matching. This turns off statement unwrapping and greedy + function name matching. For example 'strict: func();' will not match + on 'if (func() == 1)..' or 'a->func()' anymore. + +weggli automatically unwraps expression statements in the query source +to search for the inner expression instead. This means that the query `{func($x);}` +will match on `func(a);`, but also on `if (func(a)) {..}` or `return func(a)`. +Matching on `func(a)` will also match on `func(a,b,c)` or `func(z,a)`. +Similarly, `void func($t $param)` will also match function definitions +with multiple parameters. + +Additional patterns can be specified using the --pattern (-p) option. This makes +it possible to search across functions or type definitions. +"; pub const PATH: &str = "\ - Input directory or file to search. By default, weggli will search inside - .c and .h files for the default C mode or .cc, .cpp, .cxx, .h and .hpp files when - executing in C++ mode (using the --cpp option). - Alternative file endings can be specified using the --extensions=h,c (-e) option. - - When combining weggli with other tools or preprocessing steps, - files can also be specified via STDIN by setting the directory to '-' - and piping a list of filenames. - "; +Input directory or file to search. By default, weggli will search inside +.c and .h files for the default C mode or .cc, .cpp, .cxx, .h and .hpp files when +executing in C++ mode (using the --cpp option). +Alternative file endings can be specified using the --extensions=h,c (-e) option. + +When combining weggli with other tools or preprocessing steps, +files can also be specified via STDIN by setting the directory to '-' +and piping a list of filenames. +"; pub const REGEX: &str = "\ - Filter variable matches based on a regular expression. - This feature uses the Rust regex crate, so most Perl-style - regular expression features are supported. - (see https://docs.rs/regex/1.5.4/regex/#syntax) - - Examples: - - Find calls to functions starting with the string 'mem': - weggli -R 'func=^mem' '$func(_);' - - Find memcpy calls where the last argument is NOT named 'size': - weggli -R 's!=^size$' 'memcpy(_,_,$s);' - "; +Filter variable matches based on a regular expression. +This feature uses the Rust regex crate, so most Perl-style +regular expression features are supported. +(see https://docs.rs/regex/1.5.4/regex/#syntax) + +Examples: + +Find calls to functions starting with the string 'mem': +weggli -R 'func=^mem' '$func(_);' + +Find memcpy calls where the last argument is NOT named 'size': +weggli -R 's!=^size$' 'memcpy(_,_,$s);' +"; pub const UNIQUE: &str = "\ - Enforce uniqueness of variable matches. - By default, two variables such as $a and $b can match on identical values. - For example, the query '$x=malloc($a); memcpy($x, _, $b);' would - match on both - - void *buf = malloc(size); - memcpy(buf, src, size); - - and - - void *buf = malloc(some_constant); - memcpy(buf, src, size); - - Using the unique flag would filter out the first match as $a==$b. - "; +Enforce uniqueness of variable matches. +By default, two variables such as $a and $b can match on identical values. +For example, the query '$x=malloc($a); memcpy($x, _, $b);' would +match on both + +void *buf = malloc(size); +memcpy(buf, src, size); + +and + +void *buf = malloc(some_constant); +memcpy(buf, src, size); + +Using the unique flag would filter out the first match as $a==$b. +"; } From 89f41a483411f294be39a41b96b6b402c26327c6 Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Tue, 6 Feb 2024 18:16:24 +0100 Subject: [PATCH 4/5] Accept multiple PATH arguments Reading from stdin / '-' can be one of them. Plus when searching in '.' avoid the middle dot like '/path/./src/foo.c' when converting to an absolute path. Update README.md with new --help output and fix trailing whitespace. --- README.md | 167 ++++++++++++++++++++++++++-------------------------- src/cli.rs | 45 +++++++++++--- src/main.rs | 34 ++++++----- 3 files changed, 138 insertions(+), 108 deletions(-) diff --git a/README.md b/README.md index 959708c..da423ae 100644 --- a/README.md +++ b/README.md @@ -11,82 +11,80 @@ resembles C and C++ code, making it easy to turn interesting code patterns into weggli is inspired by great tools like [Semgrep](https://semgrep.dev/), [Coccinelle](https://coccinelle.gitlabpages.inria.fr/website/), [joern](https://joern.readthedocs.io/en/latest/) and [CodeQL](https://securitylab.github.com/tools/codeql), but makes some different design decisions: -- **C++ support**: weggli has first class support for modern C++ constructs, such as lambda expressions, range-based for loops and constexprs. +- **C++ support**: weggli has first class support for modern C++ constructs, such as lambda expressions, range-based for loops and constexprs. + +- **Minimal setup**: weggli should work *out-of-the box* against most software you will encounter. weggli does not require the ability to build the software and can work with incomplete sources or missing dependencies. -- **Minimal setup**: weggli should work *out-of-the box* against most software you will encounter. weggli does not require the ability to build the software and can work with incomplete sources or missing dependencies. - - **Interactive**: weggli is designed for interactive usage and fast query performance. Most of the time, a weggli query will be faster than a grep search. The goal is to enable an interactive workflow where quick switching between code review and query creation/improvement is possible. - -- **Greedy**: weggli's pattern matching is designed to find as many (useful) matches as possible for a specific query. While this increases the risk of false positives it simplifies query creation. For example, the query `$x = 10;` will match both assignment expressions (`foo = 10;`) and declarations (`int bar = 10;`). + +- **Greedy**: weggli's pattern matching is designed to find as many (useful) matches as possible for a specific query. While this increases the risk of false positives it simplifies query creation. For example, the query `$x = 10;` will match both assignment expressions (`foo = 10;`) and declarations (`int bar = 10;`). ## Usage ``` -Use -h for short descriptions and --help for more details. + Use -h for short descriptions and --help for more details. Homepage: https://github.com/weggli-rs/weggli - USAGE: weggli [OPTIONS] + USAGE: weggli [OPTIONS] ... ARGS: - + A weggli search pattern. weggli's query language closely resembles - C and C++ with a small number of extra features. - - For example, the pattern '{_ $buf[_]; memcpy($buf,_,_);}' will - find all calls to memcpy that directly write into a stack buffer. - - Besides normal C and C++ constructs, weggli's query language - supports the following features: - - _ Wildcard. Will match on any AST node. - - $var Variables. Can be used to write queries that are independent - of identifiers. Variables match on identifiers, types, - field names or namespaces. The --unique option - optionally enforces that $x != $y != $z. The --regex option can - enforce that the variable has to match (or not match) a - regular expression. - - _(..) Subexpressions. The _(..) wildcard matches on arbitrary - sub expressions. This can be helpful if you are looking for some - operation involving a variable, but don't know more about it. - For example, _(test) will match on expressions like test+10, - buf[test->size] or f(g(&test)); - - not: Negative sub queries. Only show results that do not match the - following sub query. For example, '{not: $fv==NULL; not: $fv!=NULL *$v;}' - would find pointer dereferences that are not preceded by a NULL check. - - strict: Enable stricter matching. This turns off statement unwrapping - and greedy function name matching. For example 'strict: func();' - will not match on 'if (func() == 1)..' or 'a->func()' anymore. - - weggli automatically unwraps expression statements in the query source - to search for the inner expression instead. This means that the query `{func($x);}` - will match on `func(a);`, but also on `if (func(a)) {..}` or `return func(a)`. - Matching on `func(a)` will also match on `func(a,b,c)` or `func(z,a)`. - Similarly, `void func($t $param)` will also match function definitions - with multiple parameters. - - Additional patterns can be specified using the --pattern (-p) option. This makes - it possible to search across functions or type definitions. - - + C and C++ with a small number of extra features. + + For example, the pattern '{_ $buf[_]; memcpy($buf,_,_);}' will + find all calls to memcpy that directly write into a stack buffer. + + Besides normal C and C++ constructs, weggli's query language + supports the following features: + + _ Wildcard. Will match on any AST node. + + $var Variables. Can be used to write queries that are independent + of identifiers. Variables match on identifiers, types, + field names or namespaces. The --unique option + optionally enforces that $x != $y != $z. The --regex option can + enforce that the variable has to match (or not match) a + regular expression. + + _(..) Subexpressions. The _(..) wildcard matches on arbitrary + sub expressions. This can be helpful if you are looking for some + operation involving a variable, but don't know more about it. + For example, _(test) will match on expressions like test+10, + buf[test->size] or f(g(&test)); + + not: Negative sub queries. Only show results that do not match the + following sub query. For example, '{not: $fv==NULL; not: $fv!=NULL *$v;}' + would find pointer dereferences that are not preceded by a NULL check. + + strict: Enable stricter matching. This turns off statement unwrapping and greedy + function name matching. For example 'strict: func();' will not match + on 'if (func() == 1)..' or 'a->func()' anymore. + + weggli automatically unwraps expression statements in the query source + to search for the inner expression instead. This means that the query `{func($x);}` + will match on `func(a);`, but also on `if (func(a)) {..}` or `return func(a)`. + Matching on `func(a)` will also match on `func(a,b,c)` or `func(z,a)`. + Similarly, `void func($t $param)` will also match function definitions + with multiple parameters. + + Additional patterns can be specified using the --pattern (-p) option. This makes + it possible to search across functions or type definitions. + ... Input directory or file to search. By default, weggli will search inside - .c and .h files for the default C mode or .cc, .cpp, .cxx, .h and .hpp files when - executing in C++ mode (using the --cpp option). - Alternative file endings can be specified using the --extensions (-e) option. - - When combining weggli with other tools or preprocessing steps, - files can also be specified via STDIN by setting the directory to '-' - and piping a list of filenames. + .c and .h files for the default C mode or .cc, .cpp, .cxx, .h and .hpp files when + executing in C++ mode (using the --cpp option). + Alternative file endings can be specified using the --extensions=h,c (-e) option. + When combining weggli with other tools or preprocessing steps, + files can also be specified via STDIN by setting the directory to '-' + and piping a list of filenames. OPTIONS: - -A, --after + -A, --after Lines to print after a match. Default = 5. -B, --before @@ -116,39 +114,40 @@ Use -h for short descriptions and --help for more details. -l, --limit Only show the first match in each function. + -n, --line-numbers + Enable line numbers + -p, --pattern

... Specify additional search patterns. -R, --regex ... Filter variable matches based on a regular expression. - This feature uses the Rust regex crate, so most Perl-style - regular expression features are supported. - (see https://docs.rs/regex/1.5.4/regex/#syntax) + This feature uses the Rust regex crate, so most Perl-style + regular expression features are supported. + (see https://docs.rs/regex/1.5.4/regex/#syntax) - Examples: + Examples: - Find calls to functions starting with the string 'mem': - weggli -R 'func=^mem' '$func(_);' - - Find memcpy calls where the last argument is NOT named 'size': - weggli -R 's!=^size$' 'memcpy(_,_,$s);' + Find calls to functions starting with the string 'mem': + weggli -R 'func=^mem' '$func(_);' + Find memcpy calls where the last argument is NOT named 'size': + weggli -R 's!=^size$' 'memcpy(_,_,$s);' -u, --unique Enforce uniqueness of variable matches. - By default, two variables such as $a and $b can match on identical values. - For example, the query '$x=malloc($a); memcpy($x, _, $b);' would - match on both - - void *buf = malloc(size); - memcpy(buf, src, size); + By default, two variables such as $a and $b can match on identical values. + For example, the query '$x=malloc($a); memcpy($x, _, $b);' would + match on both - and + void *buf = malloc(size); + memcpy(buf, src, size); - void *buf = malloc(some_constant); - memcpy(buf, src, size); + and - Using the unique flag would filter out the first match as $a==$b. + void *buf = malloc(some_constant); + memcpy(buf, src, size); + Using the unique flag would filter out the first match as $a==$b. -v, --verbose Sets the level of verbosity. @@ -192,8 +191,8 @@ $func(&$p); Potentially insecure WeakPtr usage: ```cpp weggli --cpp '{ -$x = _.GetWeakPtr(); -DCHECK($x); +$x = _.GetWeakPtr(); +DCHECK($x); $x->_;}' ./target/src ``` @@ -203,7 +202,7 @@ weggli -X 'DCHECK(_!=_.end());' ./target/src ``` Functions that perform writes into a stack-buffer based on -a function argument. +a function argument. ```c weggli '_ $fn(_ $limit) { _ $buf[_]; @@ -237,7 +236,7 @@ $ cargo install weggli ```sh # optional: install rust -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh git clone https://github.com/googleprojectzero/weggli.git cd weggli; cargo build --release @@ -249,8 +248,8 @@ cd weggli; cargo build --release Weggli is built on top of the [`tree-sitter`](https://tree-sitter.github.io/tree-sitter/) parsing library and its [`C`](https://github.com/tree-sitter/tree-sitter-c) and [`C++`](https://github.com/tree-sitter/tree-sitter-cpp) grammars. Search queries are first parsed using an extended version of the corresponding grammar, and the resulting `AST` is transformed into a set of tree-sitter queries -in `builder.rs`. -The actual query matching is implemented in `query.rs`, which is a relatively small wrapper around tree-sitter's query engine to add weggli specific features. +in `builder.rs`. +The actual query matching is implemented in `query.rs`, which is a relatively small wrapper around tree-sitter's query engine to add weggli specific features. ## Contributing @@ -266,5 +265,3 @@ Apache 2.0; see [`LICENSE`](LICENSE) for details. This project is not an official Google project. It is not supported by Google and Google specifically disclaims all warranties as to its quality, merchantability, or fitness for a particular purpose. - - diff --git a/src/cli.rs b/src/cli.rs index ae0b82f..692720d 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -15,11 +15,15 @@ limitations under the License. */ use clap::{App, Arg}; +use colored::Colorize; use simplelog::*; use std::path::{Path, PathBuf}; +pub const PATH_DASH_FOR_STDIN: &str = "-"; +const PATH_DOT: &str = "."; + pub struct Args { - pub path: PathBuf, + pub paths: Vec, pub pattern: Vec, pub before: usize, pub after: usize, @@ -67,6 +71,7 @@ pub fn parse_arguments() -> Args { Arg::with_name("PATH") .help("A file or directory to search.") .long_help(help::PATH) + .multiple(true) .required(true) .index(2), ) @@ -184,8 +189,6 @@ pub fn parse_arguments() -> Args { let _ = SimpleLogger::init(level, Config::default()); - let directory = Path::new(matches.value_of("PATH").unwrap_or(".")); - let mut pattern = vec![matches.value_of("PATTERN").unwrap().to_string()]; if let Some(p) = matches.values_of("p") { pattern.extend(p.map(|v| v.to_string())) @@ -193,11 +196,35 @@ pub fn parse_arguments() -> Args { let regexes = helper("regex"); - let path = if directory.is_absolute() || directory.to_string_lossy() == "-" { - directory.to_path_buf() - } else { - std::env::current_dir().unwrap().join(directory) - }; + let mut seen_dash_argument = false; + let paths = matches + .values_of("PATH") + .expect("argparser ensures presence") + .map(|path| { + let path = Path::new(path); + if path.is_absolute() { + path.to_path_buf() + } else if path == Path::new(PATH_DOT) { + std::env::current_dir().unwrap().clone() + } else if path == Path::new(PATH_DASH_FOR_STDIN) { + // Handle error here, before stdin is read the first time + if seen_dash_argument { + eprintln!( + "{}", + String::from( + "Argument '-' to read file list from STDIN can only be present once" + ) + .red() + ); + std::process::exit(1) + } + seen_dash_argument = true; + path.to_path_buf() + } else { + std::env::current_dir().unwrap().join(path) + } + }) + .collect(); let before = match matches.value_of("before") { Some(v) => v.parse().unwrap_or(5), @@ -243,7 +270,7 @@ pub fn parse_arguments() -> Args { let enable_line_numbers = matches.occurrences_of("line-numbers") > 0; Args { - path, + paths, pattern, before, after, diff --git a/src/main.rs b/src/main.rs index 80a133d..28db0b8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,11 +34,13 @@ use std::{io::prelude::*, path::PathBuf}; use thread_local::ThreadLocal; use tree_sitter::Tree; use walkdir::WalkDir; -use weggli::RegexMap; use weggli::parse_search_pattern; use weggli::query::QueryTree; use weggli::result::QueryResult; +use weggli::RegexMap; + +use cli::PATH_DASH_FOR_STDIN; mod cli; @@ -130,20 +132,24 @@ fn main() { let exclude_re = helper_regex(&args.exclude); let include_re = helper_regex(&args.include); - // Collect and filter our input file set. - let mut files: Vec = if args.path.to_string_lossy() == "-" { - std::io::stdin() - .lock() - .lines() - .map_while(Result::ok) - .map(|s| Path::new(&s).to_path_buf()) - .collect() - } else { - iter_files(&args.path, args.extensions.clone()) - .map(|d| d.into_path()) - .collect() - }; + // Collect files from input path(s) and/or stdin. + let mut files: Vec = Vec::new(); + args.paths.iter().for_each(|path| { + if path == Path::new(PATH_DASH_FOR_STDIN) { + std::io::stdin() + .lock() + .lines() + .map_while(Result::ok) + .map(|s| Path::new(&s).to_path_buf()) + .for_each(|p| files.push(p)); + } else { + iter_files(path, args.extensions.clone()) + .map(|d| d.into_path()) + .for_each(|p| files.push(p)); + } + }); + // Filter our input file set. if !exclude_re.is_empty() || !include_re.is_empty() { // Filter files based on include and exclude regexes files.retain(|f| { From a1df1cdd817bade22c5d921aab541e6e59158007 Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Mon, 5 Feb 2024 11:24:30 +0100 Subject: [PATCH 5/5] Fix result truncation being off by one The final '}' in a block was truncated when no line numbers were used. --- src/result.rs | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/result.rs b/src/result.rs index 9b89975..2fb1925 100644 --- a/src/result.rs +++ b/src/result.rs @@ -262,7 +262,6 @@ impl<'a> DisplayHelper<'a> { fn display(&mut self, before: usize, after: usize, enable_line_numbers: bool) -> String { let mut result = String::new(); - let mut skipped = true; for i in self.first..self.last + 1 { if self.lines[i].2 != 1 { @@ -285,38 +284,37 @@ impl<'a> DisplayHelper<'a> { } } + const LINE_NR_WIDTH: usize = 4; + const NOTHING_SKIPPED: usize = usize::MAX; + let mut pre_skip_len = 0; + for (line_nr, (offset, l, p)) in self.lines.iter().enumerate() { if *p == 0 { - if !skipped { - skipped = true; + if pre_skip_len == NOTHING_SKIPPED { + pre_skip_len = result.len(); if enable_line_numbers { let length = (line_nr - 1).to_string().len(); - if length < 4 { - result += &" ".repeat(4 - length) + if length < LINE_NR_WIDTH { + result += &" ".repeat(LINE_NR_WIDTH - length) } result += &".".repeat(length); - result += "\n" + result += "\n"; } else { - result += "...\n" + result += "...\n"; } } continue; } if enable_line_numbers { - result += &format!("{:>4}: ", line_nr + 1); + result += &format!("{:>width$}: ", line_nr + 1, width = LINE_NR_WIDTH); } + result += &self.format(*offset, l, 0); - skipped = false; + pre_skip_len = NOTHING_SKIPPED; } - let t = if skipped { - 6 - } else { - 1 - }; - - result.truncate(result.len() - t); + result.truncate(std::cmp::min(result.len(), pre_skip_len).saturating_sub(1)); result }