Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9e10dee
Refactor input dumping and path retrieval with extension filtering
mre May 19, 2025
f4361c0
lints
mre May 19, 2025
bbdf099
cleanup old comment
mre May 23, 2025
7033b1f
shorten comment
mre May 23, 2025
cfaea7a
Remove special handling for stdin and strings
mre May 23, 2025
727722f
clean up params in docs
mre May 23, 2025
0f01c5a
Add comment
mre May 23, 2025
df9d213
Improve dump-inputs test
mre May 23, 2025
4008f5c
refactor
mre Jun 12, 2025
3fc5ecf
Refactor dump and dump_inputs commands to simplify path exclusion han…
mre Jul 25, 2025
ea76aaf
Refactor Input handling to yield InputSource types instead of PathBuf…
mre Jul 27, 2025
f6fc264
Rename get_file_paths to get_input_sources for clarity and update doc…
mre Jul 27, 2025
09ae079
Cleanup/lint
mre Jul 27, 2025
e67cedb
Improve documentation and performance in dump_inputs function
mre Aug 14, 2025
3d56098
fix wording
mre Aug 14, 2025
b74ea2c
Refactor function signatures to use lifetimes
mre Aug 14, 2025
4be0715
Rebase
mre Aug 14, 2025
99ee4df
Add unreachable! macro for unexpected FsGlob case in Input implementa…
mre Aug 14, 2025
47be49f
Fix dump inputs output on overlapping glob patterns
mre Aug 17, 2025
ff41204
Clean up path handling
mre Aug 17, 2025
39ee003
Be more specific about output of `--dump inputs <input> --hidden`
mre Aug 17, 2025
33f6b0d
Be more specific about output when dumping individual files
mre Aug 17, 2025
60a47b2
change var name from inp to input
mre Aug 17, 2025
3e6be0d
clean up `collect_sources_with_file_types`
mre Aug 17, 2025
0291304
Turns out we don't need `tokio::pin!`. Simplify stream handling
mre Aug 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 1 addition & 145 deletions lychee-bin/src/commands/dump.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,15 @@
use log::error;
use lychee_lib::Request;
use lychee_lib::Result;
use lychee_lib::filter::PathExcludes;
use std::fs;
use std::io::{self, Write};
use std::path::PathBuf;
use tokio_stream::StreamExt;

use crate::ExitCode;
use crate::verbosity::Verbosity;

use super::CommandParams;

// Helper function to create an output writer.
//
// If the output file is not specified, it will use `stdout`.
//
// # Errors
//
// If the output file cannot be opened, an error is returned.
fn create_writer(output: Option<PathBuf>) -> Result<Box<dyn Write>> {
let out = if let Some(output) = output {
let out = fs::OpenOptions::new().append(true).open(output)?;
Box::new(out) as Box<dyn Write>
} else {
let out = io::stdout();
Box::new(out.lock()) as Box<dyn Write>
};
Ok(out)
}

/// Dump all detected links to stdout without checking them
pub(crate) async fn dump<S>(params: CommandParams<S>) -> Result<ExitCode>
where
Expand All @@ -42,7 +22,7 @@ where
fs::File::create(out_file)?;
}

let mut writer = create_writer(params.cfg.output)?;
let mut writer = super::create_writer(params.cfg.output)?;

while let Some(request) = requests.next().await {
let mut request = request?;
Expand Down Expand Up @@ -71,36 +51,6 @@ where
Ok(ExitCode::Success)
}

/// Dump all input sources to stdout without extracting any links and checking
/// them.
pub(crate) async fn dump_inputs<S>(
sources: S,
output: Option<&PathBuf>,
excluded_paths: &PathExcludes,
) -> Result<ExitCode>
where
S: futures::Stream<Item = Result<String>>,
{
if let Some(out_file) = output {
fs::File::create(out_file)?;
}

let mut writer = create_writer(output.cloned())?;

tokio::pin!(sources);
while let Some(source) = sources.next().await {
let source = source?;

if excluded_paths.is_match(&source) {
continue;
}

writeln!(writer, "{source}")?;
}

Ok(ExitCode::Success)
}

/// Dump request to stdout
fn write(
writer: &mut Box<dyn Write>,
Expand Down Expand Up @@ -134,97 +84,3 @@ fn write(
fn write_out(writer: &mut Box<dyn Write>, out_str: &str) -> io::Result<()> {
writeln!(writer, "{out_str}")
}

#[cfg(test)]
mod tests {
use super::*;
use futures::stream;
use tempfile::NamedTempFile;

#[tokio::test]
async fn test_dump_inputs_basic() -> Result<()> {
// Create temp file for output
let temp_file = NamedTempFile::new()?;
let output_path = temp_file.path().to_path_buf();

// Create test input stream
let inputs = vec![
Ok(String::from("test/path1")),
Ok(String::from("test/path2")),
Ok(String::from("test/path3")),
];
let stream = stream::iter(inputs);

// Run dump_inputs
let result = dump_inputs(stream, Some(&output_path), &PathExcludes::empty()).await?;
assert_eq!(result, ExitCode::Success);

// Verify output
let contents = fs::read_to_string(&output_path)?;
assert_eq!(contents, "test/path1\ntest/path2\ntest/path3\n");
Ok(())
}

#[tokio::test]
async fn test_dump_inputs_with_excluded_paths() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let output_path = temp_file.path().to_path_buf();

let inputs = vec![
Ok(String::from("test/path1")),
Ok(String::from("excluded/path")),
Ok(String::from("test/path2")),
];
let stream = stream::iter(inputs);

let excluded = &PathExcludes::new(["excluded"]).unwrap();
let result = dump_inputs(stream, Some(&output_path), excluded).await?;
assert_eq!(result, ExitCode::Success);

let contents = fs::read_to_string(&output_path)?;
assert_eq!(contents, "test/path1\ntest/path2\n");
Ok(())
}

#[tokio::test]
async fn test_dump_inputs_empty_stream() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let output_path = temp_file.path().to_path_buf();

let stream = stream::iter::<Vec<Result<String>>>(vec![]);
let result = dump_inputs(stream, Some(&output_path), &PathExcludes::empty()).await?;
assert_eq!(result, ExitCode::Success);

let contents = fs::read_to_string(&output_path)?;
assert_eq!(contents, "");
Ok(())
}

#[tokio::test]
async fn test_dump_inputs_error_in_stream() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let output_path = temp_file.path().to_path_buf();

let inputs: Vec<Result<String>> = vec![
Ok(String::from("test/path1")),
Err(io::Error::other("test error").into()),
Ok(String::from("test/path2")),
];
let stream = stream::iter(inputs);

let result = dump_inputs(stream, Some(&output_path), &PathExcludes::empty()).await;
assert!(result.is_err());
Ok(())
}

#[tokio::test]
async fn test_dump_inputs_to_stdout() -> Result<()> {
// When output path is None, should write to stdout
let inputs = vec![Ok(String::from("test/path1"))];
let stream = stream::iter(inputs);

let result = dump_inputs(stream, None, &PathExcludes::empty()).await?;
assert_eq!(result, ExitCode::Success);
Ok(())
}
}
58 changes: 58 additions & 0 deletions lychee-bin/src/commands/dump_inputs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use lychee_lib::{FileExtensions, Input, Result};
use std::collections::HashSet;
use std::fs;
use std::io::{self, Write};
use std::path::PathBuf;
use tokio_stream::StreamExt;

use crate::ExitCode;

/// Print all input sources to stdout, without extracting or checking links.
///
/// This command outputs the resolved input sources that would be processed
/// by lychee, including file paths, URLs, and special sources like stdin.
/// It respects file extension filtering and path exclusions.
pub(crate) async fn dump_inputs(
inputs: HashSet<Input>,
output: Option<&PathBuf>,
excluded_paths: &[String],
valid_extensions: &FileExtensions,
skip_hidden: bool,
skip_gitignored: bool,
) -> Result<ExitCode> {
if let Some(out_file) = output {
fs::File::create(out_file)?;
}

let mut writer = super::create_writer(output.cloned())?;

// Create the path filter once outside the loop for better performance
let excluded_path_filter = lychee_lib::filter::PathExcludes::new(excluded_paths)?;

// Collect all sources with deduplication
let mut seen_sources = HashSet::new();

for input in inputs {
let sources_stream = input.get_sources(
valid_extensions.clone(),
skip_hidden,
skip_gitignored,
&excluded_path_filter,
);
tokio::pin!(sources_stream);

while let Some(source_result) = sources_stream.next().await {
let source = source_result?;
// Only print if we haven't seen this source before
if seen_sources.insert(source.clone()) {
write_out(&mut writer, &source)?;
}
}
}

Ok(ExitCode::Success)
}

fn write_out(writer: &mut Box<dyn Write>, out_str: &str) -> io::Result<()> {
writeln!(writer, "{out_str}")
}
18 changes: 17 additions & 1 deletion lychee-bin/src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
pub(crate) mod check;
pub(crate) mod dump;
pub(crate) mod dump_inputs;

pub(crate) use check::check;
pub(crate) use dump::dump;
pub(crate) use dump::dump_inputs;
pub(crate) use dump_inputs::dump_inputs;

use std::fs;
use std::io::{self, Write};
use std::path::PathBuf;
use std::sync::Arc;

use crate::cache::Cache;
Expand All @@ -19,3 +23,15 @@ pub(crate) struct CommandParams<S: futures::Stream<Item = Result<Request>>> {
pub(crate) requests: S,
pub(crate) cfg: Config,
}

/// Creates a writer that outputs to a file or stdout.
///
/// # Errors
///
/// Returns an error if the output file cannot be opened.
fn create_writer(output: Option<PathBuf>) -> Result<Box<dyn Write>> {
Ok(match output {
Some(path) => Box::new(fs::OpenOptions::new().append(true).open(path)?),
None => Box::new(io::stdout().lock()),
})
}
27 changes: 14 additions & 13 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,28 +316,29 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {
}
};

let mut collector = Collector::new(opts.config.root_dir.clone(), base)?
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
.include_verbatim(opts.config.include_verbatim)
.headers(HeaderMap::from_header_pairs(&opts.config.header)?)
.excluded_paths(PathExcludes::new(opts.config.exclude_path.clone())?)
// File a bug if you rely on this envvar! It's going to go away eventually.
.use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").is_ok_and(|x| x == "1"));

if opts.config.dump_inputs {
let sources = collector.collect_sources(inputs);
let exit_code = commands::dump_inputs(
sources,
inputs,
opts.config.output.as_ref(),
&PathExcludes::new(&opts.config.exclude_path)?,
&opts.config.exclude_path,
&opts.config.extensions,
!opts.config.hidden,
opts.config.no_ignore,
)
.await?;

return Ok(exit_code as i32);
}

let mut collector = Collector::new(opts.config.root_dir.clone(), base)?
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
.include_verbatim(opts.config.include_verbatim)
.headers(HeaderMap::from_header_pairs(&opts.config.header)?)
.excluded_paths(PathExcludes::new(opts.config.exclude_path.clone())?)
// File a bug if you rely on this envvar! It's going to go away eventually.
.use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").is_ok_and(|x| x == "1"));
collector = if let Some(ref basic_auth) = opts.config.basic_auth {
collector.basic_auth_extractor(BasicAuthExtractor::new(basic_auth)?)
} else {
Expand Down
Loading
Loading