Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
e4239a7
ihii
katrinafyi Aug 31, 2025
3ed59a0
as foreseen, we have big problems with absolute-rooted file urls
katrinafyi Aug 31, 2025
84974ce
blah
katrinafyi Aug 31, 2025
2c10447
secret-lychee-local-base-url
katrinafyi Aug 31, 2025
542d9cd
lazy init
katrinafyi Aug 31, 2025
af11fc6
absolute
katrinafyi Aug 31, 2025
4fd7999
blah
katrinafyi Sep 1, 2025
c288197
blahblahblah. transpose + back-substitution
katrinafyi Sep 1, 2025
5cd3750
fix '..' being remapped too eagerly
katrinafyi Sep 1, 2025
9f1c1e4
stash
katrinafyi Sep 1, 2025
aee0ed1
refactor the thingy into separate mods
katrinafyi Sep 1, 2025
eebc3d4
use base::local for from_directory_path
katrinafyi Sep 1, 2025
e55766f
touching
katrinafyi Sep 1, 2025
1c7af89
TODO: hoist all this source-dependent computation
katrinafyi Sep 1, 2025
1bbc28d
sourcebaseinfo
katrinafyi Sep 6, 2025
439d7f9
clippy fix
katrinafyi Sep 6, 2025
49f84ba
rename
katrinafyi Sep 6, 2025
6c31811
refactor to move more things into SourceBaseInfo. TODO: fix+new tests
katrinafyi Sep 7, 2025
a1249ae
touch
katrinafyi Sep 7, 2025
24e5eb7
fix existing tests
katrinafyi Sep 7, 2025
7ac50fc
no print
katrinafyi Sep 7, 2025
9d1cd42
tweak some tests
katrinafyi Sep 7, 2025
718f462
blah. honestly idk what was happening with these tests in the past...
katrinafyi Sep 7, 2025
db70242
lychee-lib tests PASS
katrinafyi Sep 7, 2025
9c64368
strip_prefix tests and fix
katrinafyi Sep 7, 2025
b1bc0a3
Update ci.yml
katrinafyi Sep 7, 2025
0eb1b51
Update ci.yml
katrinafyi Sep 7, 2025
23d2af7
update help text
katrinafyi Sep 7, 2025
8051840
wire up fallback base url. TODO: TESTS for fallback base url
katrinafyi Sep 7, 2025
2b6650c
fix tests after fallback_base
katrinafyi Sep 7, 2025
d9b1199
remove backwards compat base-url handling
katrinafyi Sep 7, 2025
add09c7
fix collect tests
katrinafyi Sep 7, 2025
ff72bd6
fix fallback_base tests
katrinafyi Sep 7, 2025
126a9b2
fix fallback bug where fallback was applied on top of well-founded
katrinafyi Sep 7, 2025
9888134
propagate root_and_base
katrinafyi Sep 7, 2025
9df0cc3
fmt
katrinafyi Sep 7, 2025
4fb0e08
Display Base
katrinafyi Sep 7, 2025
910fdc8
doc comments ig
katrinafyi Sep 8, 2025
80fc8c7
root_and_base up to main
katrinafyi Sep 7, 2025
235f6d9
Merge remote-tracking branch 'upstream/master' into easy-mode
katrinafyi Oct 4, 2025
d2ceb22
fix main compilation
katrinafyi Oct 4, 2025
d2d3591
fix test compilation too
katrinafyi Oct 4, 2025
15fc377
Merge remote-tracking branch 'upstream/master' into easy-mode
katrinafyi Nov 13, 2025
92103c4
Merge remote-tracking branch 'origin/master' into easy-mode
katrinafyi Nov 18, 2025
0fea5eb
fix compilation
katrinafyi Nov 18, 2025
9f79231
fix tests
katrinafyi Nov 18, 2025
5aac1f1
typo
katrinafyi Nov 18, 2025
2c0798f
Merge remote-tracking branch 'upstream/master' into easy-mode
katrinafyi Dec 10, 2025
aa7c3ba
restore root-dir existence check, but ONLY for relative dirs ;-;
katrinafyi Dec 10, 2025
d84ac6d
always check root-dir
katrinafyi Dec 10, 2025
70fbe33
todo
katrinafyi Dec 10, 2025
be30abc
Merge remote-tracking branch 'upstream/master' into easy-mode
katrinafyi Dec 21, 2025
ab41e77
what is happening T_T filename URLs are so hard. what can you even d…
katrinafyi Dec 21, 2025
36dcee7
do the thing with being super strict about URL filenames
katrinafyi Dec 21, 2025
c2a77b1
Merge remote-tracking branch 'upstream/master' into easy-mode
katrinafyi Jan 23, 2026
15312f4
refactor to separate base info and mappings. neater but harder to use
katrinafyi Jan 23, 2026
c6121a8
touch
katrinafyi Jan 24, 2026
1a14c40
one less clone
katrinafyi Jan 24, 2026
d146124
clean up root_and_base
katrinafyi Jan 24, 2026
5c53ec0
comments
katrinafyi Jan 24, 2026
1437de1
flatten SourceBaseInfo options, and write a lot.
katrinafyi Jan 24, 2026
ae87f97
url_mapping separate
katrinafyi Jan 24, 2026
70b8352
docs
katrinafyi Jan 24, 2026
8638e1a
add hacky root-dir for pre-porting
katrinafyi Jan 24, 2026
c2d7458
more root dir fudging
katrinafyi Jan 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ on:
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
RUSTFLAGS: -D warnings
#RUSTFLAGS: -D warnings

jobs:
typos:
Expand Down Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Run cargo fmt (check if all code is rustfmt-ed)
run: cargo fmt --all --check
- name: Run cargo clippy (deny warnings)
run: cargo clippy --all-targets --all-features -- -D warnings
run: cargo clippy --all-targets --all-features -- #-D warnings
- uses: cargo-bins/cargo-binstall@main
- name: Install cargo-msrv
run: cargo binstall --no-confirm --force cargo-msrv
Expand Down
87 changes: 62 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -364,22 +364,28 @@ Options:
[possible values: wayback]

-b, --base-url <BASE_URL>
Base URL to use when resolving relative URLs in local files. If specified,
relative links in local files are interpreted as being relative to the given
base URL.
Remote base URL where the local root-dir will be hosted. If `--base-url` is
specified, `--root-dir` must be specified as well.

For example, given a base URL of `https://example.com/dir/page`, the link `a`
would resolve to `https://example.com/dir/a` and the link `/b` would resolve
to `https://example.com/b`. This behavior is not affected by the filesystem
path of the file containing these links.
When both `--base-url` and `--root-dir` are specified, then links will be resolved
*as if* the local root-dir was hosted at the given base-url.

Note that relative URLs without a leading slash become siblings of the base
URL. If, instead, the base URL ended in a slash, the link would become a child
of the base URL. For example, a base URL of `https://example.com/dir/page/` and
a link of `a` would resolve to `https://example.com/dir/page/a`.
This is done by virtually "splicing" the root-dir onto the base-url path. This
works in both directions: (1) links to subpaths of base-url will be resolved to
local files within root-dir, with consideration to the relative subpath, and
(2) links originating from local files which traverse outside of base-url will
resolve to remote URLs on the internet.

Basically, the base URL option resolves links as if the local files were hosted
at the given base URL address.
The two directions are demonstrated in the examples below. For these examples,
suppose a base URL of `https://example.com/dir/` and root dir of `/tmp/root`.

- (1) A link to `https://example.com/dir/sub/boop.html` will be resolved to
the local file `/tmp/root/sub/boop.html` because it is a subpath of base-url.
The relative subpath of `/sub/boop.html` is mapped into the root-dir.

- (2) A link in `/tmp/root/index.html` to `../up.html` or `/up.html` will be
resolved to the remote URL `https://example.com/up.html` because it traverses
outside of base-url.

The provided base URL value must either be a URL (with scheme) or an absolute path.
Note that certain URL schemes cannot be used as a base, e.g., `data` and `mailto`.
Expand Down Expand Up @@ -472,6 +478,26 @@ Options:
[default: compact]
[possible values: compact, detailed, json, markdown, raw]

--fallback-base-url <FALLBACK_BASE_URL>
Fallback base URL used for inputs where no more suitable base URL applies.
Each input source may have an associated base URL which describes where that
input was located, for the purpose of resolving relative links. Where Lychee
cannot determine a *well-founded* base URL for an input source, this fallback
base URL will be used.

A *well-founded* base URL is one which:
- originates from a remote URL, in which case the base URL is just the remote URL, or
- originates from a local file where `--root-dir` has been specified and the local
file path is a subpath of `--root-dir`.

In all other cases, the base URL is not well-founded and this fallback base URL
applies. In particular, this includes all links passed by stdin and, if `--root-dir`
is unspecified, this includes all links within local files.

Note that this fallback base URL applies without consideration to local file paths.
For local files, it is usually better to specify `--base-url` and `--root-dir`
which will construct a base URL while considering subpaths of `--root-dir`.

--fallback-extensions <FALLBACK_EXTENSIONS>
When checking locally, attempts to locate missing files by trying the given
fallback extensions. Multiple extensions can be separated by commas. Extensions
Expand Down Expand Up @@ -681,18 +707,29 @@ Options:
When HTTPS is available, treat HTTP links as errors

--root-dir <ROOT_DIR>
Root directory to use when checking absolute links in local files. This option is
required if absolute links appear in local files, otherwise those links will be
flagged as errors. This must be an absolute path (i.e., one beginning with `/`).

If specified, absolute links in local files are resolved by prefixing the given
root directory to the requested absolute link. For example, with a root-dir of
`/root/dir`, a link to `/page.html` would be resolved to `/root/dir/page.html`.

This option can be specified alongside `--base-url`. If both are given, an
absolute link is resolved by constructing a URL from three parts: the domain
name specified in `--base-url`, followed by the `--root-dir` directory path,
followed by the absolute link's own path.
Root directory to use when checking local files. This option is required if
absolute links appear in local files, otherwise those links will be flagged as
errors. This must be an absolute path (i.e., one beginning with `/`).

If specified, `--root-dir` acts according to three main rules:

- Links are resolved *as if* the given root-dir was hosted at the root of a
website. For example, with a root-dir of `/tmp`, a link in `/tmp/a/index.html`
to `/page.html` would be resolved to `/tmp/page.html`.

- `--root-dir` only applies to links originating from files which are subpaths
of the given root directory. Other links will be unaffected (e.g., absolute
links from files outside of root-dir will still fail to be found).

- `--root-dir` also serves to limit parent path traversal. With a root-dir of
`/tmp`, a link in `/tmp/index.html` to `../up.html` would be resolved to
`/tmp/up.html` and not `/up.html`. This is because if `/tmp` was uploaded to
a website root, traversing up beyond the root would not change the path.

Additionally, this option can be specified alongside `--base-url`. If both are
given, the behavior is augmented to resolve links as if `--root-dir` was
available at the remote URL of `--base-url`. See the help of `--base-url` for
more information.

-s, --scheme <SCHEME>
Only test links with the given schemes (e.g. https). Omit to check links with
Expand Down
1 change: 1 addition & 0 deletions fixtures/configs/smoketest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ remap = [

# Base URL or website root directory to check relative URLs.
base_url = "https://example.com"
root_dir = "."

# HTTP basic auth support. This will be the username and password passed to the
# authorization HTTP header. See
Expand Down
9 changes: 8 additions & 1 deletion lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,14 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {
return Ok(exit_code as i32);
}

let mut collector = Collector::new(opts.config.root_dir.clone(), base)?
let root_and_base = match (opts.config.root_dir.clone(), base) {
(None, None) => None,
(Some(root_dir), base) => Some((root_dir, base)),
// clap requirements should make this panic unreachable
(None, Some(_base)) => panic!("root dir must be specified when base is specified!"),
};

let mut collector = Collector::new(root_and_base, opts.config.fallback_base_url.clone())?
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
// be aware that "no ignore" means do *not* ignore files
Expand Down
101 changes: 73 additions & 28 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::generate::GenerateMode;
use crate::parse::parse_base;
use crate::verbosity::Verbosity;
use anyhow::{Context, Error, Result, anyhow};
use clap::builder::PossibleValuesParser;
use clap::builder::{ArgPredicate, PossibleValuesParser};
use clap::{Parser, builder::TypedValueParser};
use const_format::{concatcp, formatcp};
use http::{
Expand Down Expand Up @@ -765,32 +765,39 @@ Defaults to '100..=103,200..=299' if the user provides no value."
pub(crate) method: String,

/// Deprecated; use `--base-url` instead
#[arg(long, value_parser = parse_base)]
#[arg(long, value_parser = parse_base, requires_if(ArgPredicate::IsPresent, "root_dir"))]
#[serde(skip)]
pub(crate) base: Option<Base>,

/// Base URL used to resolve relative URLs in local files.
/// Remote base URL where the local root-dir will be uploaded.
/// Example: <https://example.com>
#[arg(
short,
long,
value_parser = parse_base,
long_help = "Base URL to use when resolving relative URLs in local files. If specified,
relative links in local files are interpreted as being relative to the given
base URL.
requires_if(ArgPredicate::IsPresent, "root_dir"),
long_help = "Remote base URL where the local root-dir will be hosted. If `--base-url` is
specified, `--root-dir` must be specified as well.

For example, given a base URL of `https://example.com/dir/page`, the link `a`
would resolve to `https://example.com/dir/a` and the link `/b` would resolve
to `https://example.com/b`. This behavior is not affected by the filesystem
path of the file containing these links.
When both `--base-url` and `--root-dir` are specified, then links will be resolved
*as if* the local root-dir was hosted at the given base-url.

Note that relative URLs without a leading slash become siblings of the base
URL. If, instead, the base URL ended in a slash, the link would become a child
of the base URL. For example, a base URL of `https://example.com/dir/page/` and
a link of `a` would resolve to `https://example.com/dir/page/a`.
This is done by virtually \"splicing\" the root-dir onto the base-url path. This
works in both directions: (1) links to subpaths of base-url will be resolved to
local files within root-dir, with consideration to the relative subpath, and
(2) links originating from local files which traverse outside of base-url will
resolve to remote URLs on the internet.

Basically, the base URL option resolves links as if the local files were hosted
at the given base URL address.
The two directions are demonstrated in the examples below. For these examples,
suppose a base URL of `https://example.com/dir/` and root dir of `/tmp/root`.

- (1) A link to `https://example.com/dir/sub/boop.html` will be resolved to
the local file `/tmp/root/sub/boop.html` because it is a subpath of base-url.
The relative subpath of `/sub/boop.html` is mapped into the root-dir.

- (2) A link in `/tmp/root/index.html` to `../up.html` or `/up.html` will be
resolved to the remote URL `https://example.com/up.html` because it traverses
outside of base-url.

The provided base URL value must either be a URL (with scheme) or an absolute path.
Note that certain URL schemes cannot be used as a base, e.g., `data` and `mailto`."
Expand All @@ -802,22 +809,59 @@ Note that certain URL schemes cannot be used as a base, e.g., `data` and `mailto
/// Must be an absolute path.
#[arg(
long,
long_help = "Root directory to use when checking absolute links in local files. This option is
required if absolute links appear in local files, otherwise those links will be
flagged as errors. This must be an absolute path (i.e., one beginning with `/`).

If specified, absolute links in local files are resolved by prefixing the given
root directory to the requested absolute link. For example, with a root-dir of
`/root/dir`, a link to `/page.html` would be resolved to `/root/dir/page.html`.

This option can be specified alongside `--base-url`. If both are given, an
absolute link is resolved by constructing a URL from three parts: the domain
name specified in `--base-url`, followed by the `--root-dir` directory path,
followed by the absolute link's own path."
long_help = "Root directory to use when checking local files. This option is required if
absolute links appear in local files, otherwise those links will be flagged as
errors. This must be an absolute path (i.e., one beginning with `/`).

If specified, `--root-dir` acts according to three main rules:

- Links are resolved *as if* the given root-dir was hosted at the root of a
website. For example, with a root-dir of `/tmp`, a link in `/tmp/a/index.html`
to `/page.html` would be resolved to `/tmp/page.html`.

- `--root-dir` only applies to links originating from files which are subpaths
of the given root directory. Other links will be unaffected (e.g., absolute
links from files outside of root-dir will still fail to be found).

- `--root-dir` also serves to limit parent path traversal. With a root-dir of
`/tmp`, a link in `/tmp/index.html` to `../up.html` would be resolved to
`/tmp/up.html` and not `/up.html`. This is because if `/tmp` was uploaded to
a website root, traversing up beyond the root would not change the path.

Additionally, this option can be specified alongside `--base-url`. If both are
given, the behavior is augmented to resolve links as if `--root-dir` was
available at the remote URL of `--base-url`. See the help of `--base-url` for
more information."
)]
#[serde(default)]
pub(crate) root_dir: Option<PathBuf>,

/// Fallback base URL used for inputs where no more suitable base URL applies.
#[arg(
long,
value_parser = parse_base,
long_help = "Fallback base URL used for inputs where no more suitable base URL applies.
Each input source may have an associated base URL which describes where that
input was located, for the purpose of resolving relative links. Where Lychee
cannot determine a *well-founded* base URL for an input source, this fallback
base URL will be used.

A *well-founded* base URL is one which:
- originates from a remote URL, in which case the base URL is just the remote URL, or
- originates from a local file where `--root-dir` has been specified and the local
file path is a subpath of `--root-dir`.

In all other cases, the base URL is not well-founded and this fallback base URL
applies. In particular, this includes all links passed by stdin and, if `--root-dir`
is unspecified, this includes all links within local files.

Note that this fallback base URL applies without consideration to local file paths.
For local files, it is usually better to specify `--base-url` and `--root-dir`
which will construct a base URL while considering subpaths of `--root-dir`."
)]
#[serde(default)]
pub(crate) fallback_base_url: Option<Base>,

/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
#[serde(default)]
Expand Down Expand Up @@ -1006,6 +1050,7 @@ impl Config {
exclude_private: false,
extensions: FileType::default_extensions(),
fallback_extensions: Vec::<String>::new(),
fallback_base_url: None,
files_from: None,
format: StatsFormat::default(),
generate: None,
Expand Down
6 changes: 3 additions & 3 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ mod cli {

cargo_bin_cmd!()
.arg("--offline")
.arg("--base-url")
.arg("--root-dir")
.arg(&dir)
.arg(dir.join("index.html"))
.env_clear()
Expand Down Expand Up @@ -443,9 +443,9 @@ mod cli {
cargo_bin_cmd!()
.arg("--offline")
.arg("--root-dir")
.arg("/resolve_paths")
.arg(dir.join("resolve_paths"))
.arg("--base-url")
.arg(&dir)
.arg(dir.join("resolve_paths"))
.arg(dir.join("resolve_paths").join("index.html"))
.env_clear()
.assert()
Expand Down
32 changes: 16 additions & 16 deletions lychee-lib/src/checker/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,22 +115,22 @@ impl FileChecker {
/// Returns the resolved path as a `PathBuf`, or the original path
/// if no base path is defined.
fn resolve_base(&self, path: &Path) -> PathBuf {
if let Some(Base::Local(base_path)) = &self.base {
if path.is_absolute() {
let absolute_base_path = if base_path.is_relative() {
std::env::current_dir().unwrap_or_default().join(base_path)
} else {
base_path.clone()
};

let stripped = path.strip_prefix("/").unwrap_or(path);
absolute_base_path.join(stripped)
} else {
base_path.join(path)
}
} else {
path.to_path_buf()
}
// if let Some(Base::Local(base_path)) = &self.base {
// if path.is_absolute() {
// let absolute_base_path = if base_path.is_relative() {
// std::env::current_dir().unwrap_or_default().join(base_path)
// } else {
// base_path.clone()
// };
//
// let stripped = path.strip_prefix("/").unwrap_or(path);
// absolute_base_path.join(stripped)
// } else {
// base_path.join(path)
// }
// } else {
path.to_path_buf()
// }
}

/// Resolves the given local path by applying logic which is specific to local file
Expand Down
Loading
Loading