diff --git a/README.md b/README.md index c7f1270859..5531f555b6 100644 --- a/README.md +++ b/README.md @@ -324,13 +324,29 @@ A fast, async link checker Finds broken URLs and mail addresses inside Markdown, HTML, `reStructuredText`, websites and more! -Usage: lychee [OPTIONS] ... +Usage: lychee [OPTIONS] [inputs]... Arguments: - ... - The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`), remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`). NOTE: Use `--` to separate inputs from options that allow multiple arguments + [inputs]... + The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`), remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`). Alternatively, use `--files-from` to read inputs from a file. NOTE: Use `--` to separate inputs from options that allow multiple arguments Options: + --files-from + Read input filenames from the given file or stdin (if path is '-'). + + This is useful when you have a large number of inputs that would be + cumbersome to specify on the command line directly. + + Examples: + lychee --files-from list.txt + find . -name '*.md' | lychee --files-from - + echo 'README.md' | lychee --files-from - + + File Format: + Each line should contain one input (file path, URL, or glob pattern). + Lines starting with '#' are treated as comments and ignored. + Empty lines are also ignored. + -c, --config Configuration file to use diff --git a/lychee-bin/src/files_from.rs b/lychee-bin/src/files_from.rs new file mode 100644 index 0000000000..4dc9cc12f4 --- /dev/null +++ b/lychee-bin/src/files_from.rs @@ -0,0 +1,131 @@ +//! File list reading functionality for --files-from option +//! +//! This module provides the `FilesFrom` struct which handles reading input file +//! lists from any reader, with support for comments and empty line filtering. + +use anyhow::{Context, Result}; +use std::io::{BufRead, BufReader, Read}; +use std::path::Path; + +/// Comment marker for ignoring lines in files-from input +const COMMENT_MARKER: &str = "#"; + +/// Represents a source of input file paths that can be read from any reader +#[derive(Debug, Clone)] +pub(crate) struct FilesFrom { + /// The list of input file paths + pub(crate) inputs: Vec, +} + +impl FilesFrom { + /// Create `FilesFrom` from any reader + pub(crate) fn from_reader(reader: R) -> Result { + let buf_reader = BufReader::new(reader); + let lines: Vec = buf_reader + .lines() + .collect::, _>>() + .context("Cannot read lines from reader")?; + + let inputs = Self::filter_lines(lines); + Ok(FilesFrom { inputs }) + } + + /// Filter out comments and empty lines from input + fn filter_lines(lines: Vec) -> Vec { + lines + .into_iter() + .filter(|line| { + let line = line.trim(); + !line.is_empty() && !line.starts_with(COMMENT_MARKER) + }) + .collect() + } +} + +impl TryFrom<&Path> for FilesFrom { + type Error = anyhow::Error; + + fn try_from(path: &Path) -> Result { + if path == Path::new("-") { + Self::from_reader(std::io::stdin()) + } else { + let file = std::fs::File::open(path) + .with_context(|| format!("Cannot open --files-from file: {}", path.display()))?; + Self::from_reader(file) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::io::Cursor; + use tempfile::tempdir; + + #[test] + fn test_filter_lines() { + let input = vec![ + "file1.md".to_string(), + String::new(), + "# This is a comment".to_string(), + "file2.md".to_string(), + " ".to_string(), + " # Another comment".to_string(), + "file3.md".to_string(), + ]; + + let result = FilesFrom::filter_lines(input); + assert_eq!(result, vec!["file1.md", "file2.md", "file3.md"]); + } + + #[test] + fn test_from_reader() -> Result<()> { + let input = "# Comment\nfile1.md\n\nfile2.md\n# Another comment\nfile3.md\n"; + let reader = Cursor::new(input); + + let files_from = FilesFrom::from_reader(reader)?; + assert_eq!(files_from.inputs, vec!["file1.md", "file2.md", "file3.md"]); + + Ok(()) + } + + #[test] + fn test_from_reader_empty() -> Result<()> { + let input = "# Only comments\n\n# More comments\n \n"; + let reader = Cursor::new(input); + + let files_from = FilesFrom::from_reader(reader)?; + assert_eq!(files_from.inputs, Vec::::new()); + + Ok(()) + } + + #[test] + fn test_try_from_file() -> Result<()> { + let temp_dir = tempdir()?; + let file_path = temp_dir.path().join("files.txt"); + + fs::write( + &file_path, + "# Comment\nfile1.md\n\nfile2.md\n# Another comment\nfile3.md\n", + )?; + + let files_from = FilesFrom::try_from(file_path.as_path())?; + assert_eq!(files_from.inputs, vec!["file1.md", "file2.md", "file3.md"]); + + Ok(()) + } + + #[test] + fn test_try_from_nonexistent_file() { + let result = FilesFrom::try_from(Path::new("/nonexistent/file.txt")); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Cannot open --files-from file") + ); + } +} diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 2dee326e9b..c814b0aa7c 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -84,6 +84,7 @@ use lychee_lib::CookieJar; mod cache; mod client; mod commands; +mod files_from; mod formatters; mod options; mod parse; diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 0ac5cae345..2afe2eb646 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -1,3 +1,4 @@ +use crate::files_from::FilesFrom; use crate::parse::parse_base; use crate::verbosity::Verbosity; use anyhow::{Context, Error, Result, anyhow}; @@ -312,10 +313,32 @@ pub(crate) struct LycheeOptions { /// The inputs (where to get links to check from). /// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`), /// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`). + /// Alternatively, use `--files-from` to read inputs from a file. /// NOTE: Use `--` to separate inputs from options that allow multiple arguments. - #[arg(name = "inputs", required = true)] + #[arg(name = "inputs", required_unless_present = "files_from")] raw_inputs: Vec, + /// Read input filenames from the given file or stdin (if path is '-'). + #[arg( + long = "files-from", + value_name = "PATH", + long_help = "Read input filenames from the given file or stdin (if path is '-'). + +This is useful when you have a large number of inputs that would be +cumbersome to specify on the command line directly. + +Examples: + lychee --files-from list.txt + find . -name '*.md' | lychee --files-from - + echo 'README.md' | lychee --files-from - + +File Format: + Each line should contain one input (file path, URL, or glob pattern). + Lines starting with '#' are treated as comments and ignored. + Empty lines are also ignored." + )] + files_from: Option, + /// Configuration file to use #[arg(short, long = "config")] #[arg(help = HELP_MSG_CONFIG_FILE)] @@ -331,7 +354,16 @@ impl LycheeOptions { // accept a `Vec` in `LycheeOptions` and do the conversion there, but // we wouldn't get access to `glob_ignore_case`. pub(crate) fn inputs(&self) -> Result> { - self.raw_inputs + let mut all_inputs = self.raw_inputs.clone(); + + // If --files-from is specified, read inputs from the file + if let Some(files_from_path) = &self.files_from { + let files_from = FilesFrom::try_from(files_from_path.as_path()) + .context("Cannot read inputs from --files-from")?; + all_inputs.extend(files_from.inputs); + } + + all_inputs .iter() .map(|raw_input| Input::new(raw_input, None, self.config.glob_ignore_case)) .collect::>() diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 6ff06d1895..9ad2db640c 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -2739,4 +2739,110 @@ mod cli { .stderr("") // Ensure stderr is empty .stdout(contains("https://example.com/sitemap.xml")); } + + #[test] + fn test_files_from_file() -> Result<()> { + let temp_dir = tempfile::tempdir()?; + let files_list_path = temp_dir.path().join("files.txt"); + let test_md = temp_dir.path().join("test.md"); + + // Create test files + fs::write(&test_md, "# Test\n[link](https://example.com)")?; + fs::write(&files_list_path, test_md.to_string_lossy().as_ref())?; + + let mut cmd = main_command(); + cmd.arg("--files-from") + .arg(&files_list_path) + .arg("--dump-inputs") + .assert() + .success() + .stdout(contains(test_md.to_string_lossy().as_ref())); + + Ok(()) + } + + #[test] + fn test_files_from_stdin() -> Result<()> { + let temp_dir = tempfile::tempdir()?; + let test_md = temp_dir.path().join("test.md"); + + // Create test file + fs::write(&test_md, "# Test\n[link](https://example.com)")?; + + let mut cmd = main_command(); + cmd.arg("--files-from") + .arg("-") + .arg("--dump-inputs") + .write_stdin(test_md.to_string_lossy().as_ref()) + .assert() + .success() + .stdout(contains(test_md.to_string_lossy().as_ref())); + + Ok(()) + } + + #[test] + fn test_files_from_with_comments_and_empty_lines() -> Result<()> { + let temp_dir = tempfile::tempdir()?; + let files_list_path = temp_dir.path().join("files.txt"); + let test_md = temp_dir.path().join("test.md"); + + // Create test files + fs::write(&test_md, "# Test\n[link](https://example.com)")?; + fs::write( + &files_list_path, + format!( + "# Comment line\n\n{}\n# Another comment\n", + test_md.display() + ), + )?; + + let mut cmd = main_command(); + cmd.arg("--files-from") + .arg(&files_list_path) + .arg("--dump-inputs") + .assert() + .success() + .stdout(contains(test_md.to_string_lossy().as_ref())); + + Ok(()) + } + + #[test] + fn test_files_from_combined_with_regular_inputs() -> Result<()> { + let temp_dir = tempfile::tempdir()?; + let files_list_path = temp_dir.path().join("files.txt"); + let test_md1 = temp_dir.path().join("test1.md"); + let test_md2 = temp_dir.path().join("test2.md"); + + // Create test files + fs::write(&test_md1, "# Test 1")?; + fs::write(&test_md2, "# Test 2")?; + fs::write(&files_list_path, test_md1.to_string_lossy().as_ref())?; + + let mut cmd = main_command(); + cmd.arg("--files-from") + .arg(&files_list_path) + .arg(&test_md2) // Regular input argument + .arg("--dump-inputs") + .assert() + .success() + .stdout(contains(test_md1.to_string_lossy().as_ref())) + .stdout(contains(test_md2.to_string_lossy().as_ref())); + + Ok(()) + } + + #[test] + fn test_files_from_nonexistent_file_error() -> Result<()> { + let mut cmd = main_command(); + cmd.arg("--files-from") + .arg("/nonexistent/file.txt") + .arg("--dump-inputs") + .assert() + .failure() + .stderr(contains("Cannot open --files-from file")); + + Ok(()) + } } diff --git a/lychee-bin/tests/usage.rs b/lychee-bin/tests/usage.rs index 7a1983d3c5..ce078745ce 100644 --- a/lychee-bin/tests/usage.rs +++ b/lychee-bin/tests/usage.rs @@ -5,7 +5,7 @@ mod readme { use assert_cmd::Command; use pretty_assertions::assert_eq; - const USAGE_STRING: &str = "Usage: lychee [OPTIONS] ...\n"; + const USAGE_STRING: &str = "Usage: lychee [OPTIONS] [inputs]...\n"; fn main_command() -> Command { // this gets the "main" binary name (e.g. `lychee`)