Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -324,13 +324,29 @@ A fast, async link checker

Finds broken URLs and mail addresses inside Markdown, HTML, `reStructuredText`, websites and more!

Usage: lychee [OPTIONS] <inputs>...
Usage: lychee [OPTIONS] [inputs]...

Arguments:
<inputs>...
The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`), remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`). NOTE: Use `--` to separate inputs from options that allow multiple arguments
[inputs]...
The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`), remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`). Alternatively, use `--files-from` to read inputs from a file. NOTE: Use `--` to separate inputs from options that allow multiple arguments

Options:
--files-from <PATH>
Read input filenames from the given file or stdin (if path is '-').

This is useful when you have a large number of inputs that would be
cumbersome to specify on the command line directly.

Examples:
lychee --files-from list.txt
find . -name '*.md' | lychee --files-from -
echo 'README.md' | lychee --files-from -

File Format:
Each line should contain one input (file path, URL, or glob pattern).
Lines starting with '#' are treated as comments and ignored.
Empty lines are also ignored.

-c, --config <CONFIG_FILE>
Configuration file to use

Expand Down
131 changes: 131 additions & 0 deletions lychee-bin/src/files_from.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//! File list reading functionality for --files-from option
//!
//! This module provides the `FilesFrom` struct which handles reading input file
//! lists from any reader, with support for comments and empty line filtering.

use anyhow::{Context, Result};
use std::io::{BufRead, BufReader, Read};
use std::path::Path;

/// Comment marker for ignoring lines in files-from input
const COMMENT_MARKER: &str = "#";

/// Represents a source of input file paths that can be read from any reader
#[derive(Debug, Clone)]
pub(crate) struct FilesFrom {
/// The list of input file paths
pub(crate) inputs: Vec<String>,
}

impl FilesFrom {
/// Create `FilesFrom` from any reader
pub(crate) fn from_reader<R: Read>(reader: R) -> Result<Self> {
let buf_reader = BufReader::new(reader);
let lines: Vec<String> = buf_reader
.lines()
.collect::<Result<Vec<_>, _>>()
.context("Cannot read lines from reader")?;

let inputs = Self::filter_lines(lines);
Ok(FilesFrom { inputs })
}

/// Filter out comments and empty lines from input
fn filter_lines(lines: Vec<String>) -> Vec<String> {
lines
.into_iter()
.filter(|line| {
let line = line.trim();
!line.is_empty() && !line.starts_with(COMMENT_MARKER)
})
.collect()
}
}

impl TryFrom<&Path> for FilesFrom {
type Error = anyhow::Error;

fn try_from(path: &Path) -> Result<Self, Self::Error> {
if path == Path::new("-") {
Self::from_reader(std::io::stdin())
} else {
let file = std::fs::File::open(path)
.with_context(|| format!("Cannot open --files-from file: {}", path.display()))?;
Self::from_reader(file)
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::io::Cursor;
use tempfile::tempdir;

#[test]
fn test_filter_lines() {
let input = vec![
"file1.md".to_string(),
String::new(),
"# This is a comment".to_string(),
"file2.md".to_string(),
" ".to_string(),
" # Another comment".to_string(),
"file3.md".to_string(),
];

let result = FilesFrom::filter_lines(input);
assert_eq!(result, vec!["file1.md", "file2.md", "file3.md"]);
}

#[test]
fn test_from_reader() -> Result<()> {
let input = "# Comment\nfile1.md\n\nfile2.md\n# Another comment\nfile3.md\n";
let reader = Cursor::new(input);

let files_from = FilesFrom::from_reader(reader)?;
assert_eq!(files_from.inputs, vec!["file1.md", "file2.md", "file3.md"]);

Ok(())
}

#[test]
fn test_from_reader_empty() -> Result<()> {
let input = "# Only comments\n\n# More comments\n \n";
let reader = Cursor::new(input);

let files_from = FilesFrom::from_reader(reader)?;
assert_eq!(files_from.inputs, Vec::<String>::new());

Ok(())
}

#[test]
fn test_try_from_file() -> Result<()> {
let temp_dir = tempdir()?;
let file_path = temp_dir.path().join("files.txt");

fs::write(
&file_path,
"# Comment\nfile1.md\n\nfile2.md\n# Another comment\nfile3.md\n",
)?;

let files_from = FilesFrom::try_from(file_path.as_path())?;
assert_eq!(files_from.inputs, vec!["file1.md", "file2.md", "file3.md"]);

Ok(())
}

#[test]
fn test_try_from_nonexistent_file() {
let result = FilesFrom::try_from(Path::new("/nonexistent/file.txt"));
assert!(result.is_err());
assert!(
result
.unwrap_err()
.to_string()
.contains("Cannot open --files-from file")
);
}
}
1 change: 1 addition & 0 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ use lychee_lib::CookieJar;
mod cache;
mod client;
mod commands;
mod files_from;
mod formatters;
mod options;
mod parse;
Expand Down
36 changes: 34 additions & 2 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::files_from::FilesFrom;
use crate::parse::parse_base;
use crate::verbosity::Verbosity;
use anyhow::{Context, Error, Result, anyhow};
Expand Down Expand Up @@ -312,10 +313,32 @@ pub(crate) struct LycheeOptions {
/// The inputs (where to get links to check from).
/// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`),
/// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`).
/// Alternatively, use `--files-from` to read inputs from a file.
/// NOTE: Use `--` to separate inputs from options that allow multiple arguments.
#[arg(name = "inputs", required = true)]
#[arg(name = "inputs", required_unless_present = "files_from")]
raw_inputs: Vec<String>,

/// Read input filenames from the given file or stdin (if path is '-').
#[arg(
long = "files-from",
value_name = "PATH",
long_help = "Read input filenames from the given file or stdin (if path is '-').

This is useful when you have a large number of inputs that would be
cumbersome to specify on the command line directly.

Examples:
lychee --files-from list.txt
find . -name '*.md' | lychee --files-from -
echo 'README.md' | lychee --files-from -

File Format:
Each line should contain one input (file path, URL, or glob pattern).
Lines starting with '#' are treated as comments and ignored.
Empty lines are also ignored."
)]
files_from: Option<PathBuf>,

/// Configuration file to use
#[arg(short, long = "config")]
#[arg(help = HELP_MSG_CONFIG_FILE)]
Expand All @@ -331,7 +354,16 @@ impl LycheeOptions {
// accept a `Vec<Input>` in `LycheeOptions` and do the conversion there, but
// we wouldn't get access to `glob_ignore_case`.
pub(crate) fn inputs(&self) -> Result<HashSet<Input>> {
self.raw_inputs
let mut all_inputs = self.raw_inputs.clone();

// If --files-from is specified, read inputs from the file
if let Some(files_from_path) = &self.files_from {
let files_from = FilesFrom::try_from(files_from_path.as_path())
.context("Cannot read inputs from --files-from")?;
all_inputs.extend(files_from.inputs);
}

all_inputs
.iter()
.map(|raw_input| Input::new(raw_input, None, self.config.glob_ignore_case))
.collect::<Result<_, _>>()
Expand Down
106 changes: 106 additions & 0 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2739,4 +2739,110 @@ mod cli {
.stderr("") // Ensure stderr is empty
.stdout(contains("https://example.com/sitemap.xml"));
}

#[test]
fn test_files_from_file() -> Result<()> {
let temp_dir = tempfile::tempdir()?;
let files_list_path = temp_dir.path().join("files.txt");
let test_md = temp_dir.path().join("test.md");

// Create test files
fs::write(&test_md, "# Test\n[link](https://example.com)")?;
fs::write(&files_list_path, test_md.to_string_lossy().as_ref())?;

let mut cmd = main_command();
cmd.arg("--files-from")
.arg(&files_list_path)
.arg("--dump-inputs")
.assert()
.success()
.stdout(contains(test_md.to_string_lossy().as_ref()));

Ok(())
}

#[test]
fn test_files_from_stdin() -> Result<()> {
let temp_dir = tempfile::tempdir()?;
let test_md = temp_dir.path().join("test.md");

// Create test file
fs::write(&test_md, "# Test\n[link](https://example.com)")?;

let mut cmd = main_command();
cmd.arg("--files-from")
.arg("-")
.arg("--dump-inputs")
.write_stdin(test_md.to_string_lossy().as_ref())
.assert()
.success()
.stdout(contains(test_md.to_string_lossy().as_ref()));

Ok(())
}

#[test]
fn test_files_from_with_comments_and_empty_lines() -> Result<()> {
let temp_dir = tempfile::tempdir()?;
let files_list_path = temp_dir.path().join("files.txt");
let test_md = temp_dir.path().join("test.md");

// Create test files
fs::write(&test_md, "# Test\n[link](https://example.com)")?;
fs::write(
&files_list_path,
format!(
"# Comment line\n\n{}\n# Another comment\n",
test_md.display()
),
)?;

let mut cmd = main_command();
cmd.arg("--files-from")
.arg(&files_list_path)
.arg("--dump-inputs")
.assert()
.success()
.stdout(contains(test_md.to_string_lossy().as_ref()));

Ok(())
}

#[test]
fn test_files_from_combined_with_regular_inputs() -> Result<()> {
let temp_dir = tempfile::tempdir()?;
let files_list_path = temp_dir.path().join("files.txt");
let test_md1 = temp_dir.path().join("test1.md");
let test_md2 = temp_dir.path().join("test2.md");

// Create test files
fs::write(&test_md1, "# Test 1")?;
fs::write(&test_md2, "# Test 2")?;
fs::write(&files_list_path, test_md1.to_string_lossy().as_ref())?;

let mut cmd = main_command();
cmd.arg("--files-from")
.arg(&files_list_path)
.arg(&test_md2) // Regular input argument
.arg("--dump-inputs")
.assert()
.success()
.stdout(contains(test_md1.to_string_lossy().as_ref()))
.stdout(contains(test_md2.to_string_lossy().as_ref()));

Ok(())
}

#[test]
fn test_files_from_nonexistent_file_error() -> Result<()> {
let mut cmd = main_command();
cmd.arg("--files-from")
.arg("/nonexistent/file.txt")
.arg("--dump-inputs")
.assert()
.failure()
.stderr(contains("Cannot open --files-from file"));

Ok(())
}
}
2 changes: 1 addition & 1 deletion lychee-bin/tests/usage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mod readme {
use assert_cmd::Command;
use pretty_assertions::assert_eq;

const USAGE_STRING: &str = "Usage: lychee [OPTIONS] <inputs>...\n";
const USAGE_STRING: &str = "Usage: lychee [OPTIONS] [inputs]...\n";

fn main_command() -> Command {
// this gets the "main" binary name (e.g. `lychee`)
Expand Down
Loading