Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,9 @@ Options:
--dump
Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked

--dump-inputs
Don't perform any link extraction and checking. Instead, dump all input sources from which links would be collected

--archive <ARCHIVE>
Specify the use of a specific web archive. Can be used in combination with `--suggest`

Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
28 changes: 26 additions & 2 deletions lychee-bin/src/commands/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ where
let requests = params.requests;
tokio::pin!(requests);

if let Some(outfile) = &params.cfg.output {
fs::File::create(outfile)?;
if let Some(out_file) = &params.cfg.output {
fs::File::create(out_file)?;
}

let mut writer = create_writer(params.cfg.output)?;
Expand Down Expand Up @@ -70,6 +70,30 @@ where
Ok(ExitCode::Success)
}

/// Dump all input sources to stdout without extracting any links and checking
/// them.
pub(crate) async fn dump_inputs<S>(sources: S, output: Option<&PathBuf>) -> Result<ExitCode>
where
S: futures::Stream<Item = Result<String>>,
{
let sources = sources;
tokio::pin!(sources);

if let Some(out_file) = output {
fs::File::create(out_file)?;
}

let mut writer = create_writer(output.cloned())?;

while let Some(source) = sources.next().await {
let source = source?;

writeln!(writer, "{source}")?;
}

Ok(ExitCode::Success)
}

/// Dump request to stdout
fn write(
writer: &mut Box<dyn Write>,
Expand Down
1 change: 1 addition & 0 deletions lychee-bin/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub(crate) mod dump;

pub(crate) use check::check;
pub(crate) use dump::dump;
pub(crate) use dump::dump_inputs;

use std::sync::Arc;

Expand Down
7 changes: 7 additions & 0 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,13 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {
// File a bug if you rely on this envvar! It's going to go away eventually.
.use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").map_or(false, |x| x == "1"));

if opts.config.dump_inputs {
let sources = collector.collect_sources(inputs).await;
let exit_code = commands::dump_inputs(sources, opts.config.output.as_ref()).await?;

return Ok(exit_code as i32);
}

collector = if let Some(ref basic_auth) = opts.config.basic_auth {
collector.basic_auth_extractor(BasicAuthExtractor::new(basic_auth)?)
} else {
Expand Down
6 changes: 6 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ pub(crate) struct Config {
#[serde(default)]
pub(crate) dump: bool,

/// Don't perform any link extraction and checking.
/// Instead, dump all input sources from which links would be collected
#[arg(long)]
#[serde(default)]
pub(crate) dump_inputs: bool,

/// Specify the use of a specific web archive.
/// Can be used in combination with `--suggest`
#[arg(long, value_parser = clap::builder::PossibleValuesParser::new(Archive::VARIANTS).map(|s| s.parse::<Archive>().unwrap()))]
Expand Down
70 changes: 70 additions & 0 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1347,4 +1347,74 @@ mod cli {

Ok(())
}

#[test]
fn test_dump_inputs_glob_md() -> Result<()> {
let pattern = fixtures_path().join("**/*.md");

let mut cmd = main_command();
cmd.arg("--dump-inputs")
.arg(pattern)
.assert()
.success()
.stdout(contains("fixtures/dump_inputs/subfolder/file2.md"))
.stdout(contains("fixtures/dump_inputs/markdown.md"));

Ok(())
}

#[test]
fn test_dump_inputs_glob_all() -> Result<()> {
let pattern = fixtures_path().join("**/*");

let mut cmd = main_command();
cmd.arg("--dump-inputs")
.arg(pattern)
.assert()
.success()
.stdout(contains("fixtures/dump_inputs/subfolder/test.html"))
.stdout(contains("fixtures/dump_inputs/subfolder/file2.md"))
.stdout(contains("fixtures/dump_inputs/subfolder"))
.stdout(contains("fixtures/dump_inputs/markdown.md"))
.stdout(contains("fixtures/dump_inputs/subfolder/example.bin"))
.stdout(contains("fixtures/dump_inputs/some_file.txt"));

Ok(())
}

#[test]
fn test_dump_inputs_url() -> Result<()> {
let mut cmd = main_command();
cmd.arg("--dump-inputs")
.arg("https://example.com")
.assert()
.success()
.stdout(contains("https://example.com"));

Ok(())
}

#[test]
fn test_dump_inputs_path() -> Result<()> {
let mut cmd = main_command();
cmd.arg("--dump-inputs")
.arg("fixtures")
.assert()
.success()
.stdout(contains("fixtures"));

Ok(())
}

#[test]
fn test_dump_inputs_stdin() -> Result<()> {
let mut cmd = main_command();
cmd.arg("--dump-inputs")
.arg("-")
.assert()
.success()
.stdout(contains("Stdin"));

Ok(())
}
}
8 changes: 8 additions & 0 deletions lychee-lib/src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ impl Collector {
self
}

/// Collect all sources from a list of [`Input`]s. For further details,
/// see also [`Input::get_sources`](crate::Input#method.get_sources).
pub async fn collect_sources(self, inputs: Vec<Input>) -> impl Stream<Item = Result<String>> {
stream::iter(inputs)
.par_then_unordered(None, move |input| async move { input.get_sources().await })
.flatten()
}

/// Fetch all unique links from inputs
/// All relative URLs get prefixed with `base` (if given).
/// (This can be a directory or a base URL)
Expand Down
39 changes: 37 additions & 2 deletions lychee-lib/src/types/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,41 @@ impl Input {
}
}

/// Retrieve all sources from this input. The output depends on the type of
/// input:
///
/// - Remote URLs are returned as is, in their full form
/// - Filepath Glob Patterns are expanded and each matched entry is returned
/// - Absolute or relative filepaths are returned as is
/// - All other input types are not returned
///
/// # Errors
///
/// Returns an error if the globbing fails with the expanded pattern.
pub async fn get_sources(self) -> impl Stream<Item = Result<String>> {
try_stream! {
match self.source {
InputSource::RemoteUrl(url) => yield url.to_string(),
InputSource::FsGlob { pattern, ignore_case } => {
let glob_expanded = tilde(&pattern).to_string();
let mut match_opts = glob::MatchOptions::new();

match_opts.case_sensitive = !ignore_case;

for entry in glob_with(&glob_expanded, match_opts)? {
match entry {
Ok(path) => yield path.to_string_lossy().to_string(),
Err(e) => eprintln!("{e:?}")
}
}
},
InputSource::FsPath(path) => yield path.to_string_lossy().to_string(),
InputSource::Stdin => yield "Stdin".into(),
InputSource::String(_) => yield "Raw String".into(),
}
}
}

async fn url_contents(url: &Url) -> Result<InputContent> {
// Assume HTML for default paths
let file_type = if url.path().is_empty() || url.path() == "/" {
Expand All @@ -282,10 +317,10 @@ impl Input {

async fn glob_contents(
&self,
path_glob: &str,
pattern: &str,
ignore_case: bool,
) -> impl Stream<Item = Result<InputContent>> + '_ {
let glob_expanded = tilde(&path_glob).to_string();
let glob_expanded = tilde(&pattern).to_string();
let mut match_opts = glob::MatchOptions::new();

match_opts.case_sensitive = !ignore_case;
Expand Down