Skip to content

Commit

Permalink
detect broken links (#1698)
Browse files Browse the repository at this point in the history
* detect broken links

closes #363
closes #1683

* move links, anchors

* tidy link checking

* edits

* validateLinks

* tweak style

---------

Co-authored-by: Mike Bostock <[email protected]>
  • Loading branch information
Fil and mbostock authored Sep 30, 2024
1 parent 8c61a38 commit ca823ce
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 4 deletions.
28 changes: 28 additions & 0 deletions src/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -365,9 +365,37 @@ export async function build(
}
effects.logger.log("");

// Check links. TODO Have this break the build, and move this check earlier?
const [validLinks, brokenLinks] = validateLinks(outputs);
if (brokenLinks.length) {
effects.logger.warn(`${yellow("Warning: ")}${brokenLinks.length} broken link${brokenLinks.length === 1 ? "" : "s"} (${validLinks.length + brokenLinks.length} validated)`); // prettier-ignore
for (const [path, link] of brokenLinks) effects.logger.log(`${faint("↳")} ${path} ${faint("→")} ${red(link)}`);
} else if (validLinks.length) {
effects.logger.log(`${green(`${validLinks.length}`)} link${validLinks.length === 1 ? "" : "s"} validated`);
}

Telemetry.record({event: "build", step: "finish", pageCount});
}

type Link = [path: string, target: string];

function validateLinks(outputs: Map<string, {resolvers: Resolvers}>): [valid: Link[], broken: Link[]] {
const validTargets = new Set<string>(outputs.keys()); // e.g., "/this/page#hash";
for (const [path, {resolvers}] of outputs) {
for (const anchor of resolvers.anchors) {
validTargets.add(`${path}#${encodeURIComponent(anchor)}`);
}
}
const valid: Link[] = [];
const broken: Link[] = [];
for (const [path, {resolvers}] of outputs) {
for (const target of resolvers.localLinks) {
(validTargets.has(target) ? valid : broken).push([path, target]);
}
}
return [valid, broken];
}

function applyHash(path: string, hash: string): string {
const ext = extname(path);
let name = basename(path, ext);
Expand Down
21 changes: 19 additions & 2 deletions src/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import he from "he";
import hljs from "highlight.js";
import type {DOMWindow} from "jsdom";
import {JSDOM, VirtualConsole} from "jsdom";
import {isAssetPath, relativePath, resolveLocalPath} from "./path.js";
import {isAssetPath, parseRelativeUrl, relativePath, resolveLocalPath, resolvePath} from "./path.js";

const ASSET_ATTRIBUTES: readonly [selector: string, src: string][] = [
["a[href][download]", "href"],
Expand Down Expand Up @@ -41,6 +41,8 @@ export function parseHtml(html: string): DOMWindow {

interface Assets {
files: Set<string>;
anchors: Set<string>;
localLinks: Set<string>;
localImports: Set<string>;
globalImports: Set<string>;
staticImports: Set<string>;
Expand All @@ -49,6 +51,8 @@ interface Assets {
export function findAssets(html: string, path: string): Assets {
const {document} = parseHtml(html);
const files = new Set<string>();
const anchors = new Set<string>();
const localLinks = new Set<string>();
const localImports = new Set<string>();
const globalImports = new Set<string>();
const staticImports = new Set<string>();
Expand Down Expand Up @@ -99,7 +103,20 @@ export function findAssets(html: string, path: string): Assets {
}
}

return {files, localImports, globalImports, staticImports};
for (const element of document.querySelectorAll<HTMLElement>("[id],[name]")) {
if (isExternal(element)) continue;
anchors.add(element.getAttribute("id") ?? element.getAttribute("name")!);
}

for (const a of document.querySelectorAll<HTMLAnchorElement>("a[href]")) {
if (isExternal(a) || a.hasAttribute("download")) continue;
const href = a.getAttribute("href")!;
if (/^\w+:/.test(href)) continue; // URL
const {pathname, search, hash} = parseRelativeUrl(href);
localLinks.add(resolvePath(path, pathname).replace(/\.html$/i, "").replace(/\/$/, "/index") + search + hash); // prettier-ignore
}

return {files, localImports, globalImports, staticImports, localLinks, anchors};
}

export function rewriteHtmlPaths(html: string, path: string): string {
Expand Down
2 changes: 1 addition & 1 deletion src/path.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export function resolvePath(root: string, source: string, target: string): strin
export function resolvePath(root: string, source: string, target?: string): string {
if (target === undefined) (target = source), (source = root), (root = ".");
const path = join(root, target === "" ? source : target.startsWith("/") ? "." : dirname(source), target);
return path.startsWith("../") ? path : `/${path}`;
return path.startsWith("../") ? path : join("/", path);
}

/**
Expand Down
12 changes: 11 additions & 1 deletion src/resolvers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ export interface Resolvers {
hash: string;
assets: Set<string>; // like files, but not registered for FileAttachment
files: Set<string>;
anchors: Set<string>;
localLinks: Set<string>;
localImports: Set<string>;
globalImports: Set<string>;
staticImports: Set<string>;
Expand Down Expand Up @@ -88,6 +90,8 @@ export async function getResolvers(page: MarkdownPage, config: ResolversConfig):
const assets = new Set<string>();
const files = new Set<string>();
const fileMethods = new Set<string>();
const anchors = new Set<string>();
const localLinks = new Set<string>();
const localImports = new Set<string>();
const globalImports = new Set<string>(defaultImports);
const staticImports = new Set<string>(defaultImports);
Expand All @@ -98,6 +102,8 @@ export async function getResolvers(page: MarkdownPage, config: ResolversConfig):
if (!html) continue;
const info = findAssets(html, path);
for (const f of info.files) assets.add(f);
for (const a of info.anchors) anchors.add(a);
for (const l of info.localLinks) localLinks.add(l);
for (const i of info.localImports) localImports.add(i);
for (const i of info.globalImports) globalImports.add(i);
for (const i of info.staticImports) staticImports.add(i);
Expand Down Expand Up @@ -151,6 +157,8 @@ export async function getResolvers(page: MarkdownPage, config: ResolversConfig):
path,
hash: hash.digest("hex"),
assets,
anchors,
localLinks,
...(await resolveResolvers(
{
files,
Expand All @@ -172,6 +180,8 @@ export async function getModuleResolvers(path: string, config: Omit<ResolversCon
path,
hash: getModuleHash(root, path),
assets: new Set(),
anchors: new Set(),
localLinks: new Set(),
...(await resolveResolvers({localImports: [path], staticImports: [path]}, {path, ...config}))
};
}
Expand All @@ -193,7 +203,7 @@ async function resolveResolvers(
stylesheets?: Iterable<string> | null;
},
{root, path, normalizePath, loaders}: ResolversConfig
): Promise<Omit<Resolvers, "path" | "hash" | "assets">> {
): Promise<Omit<Resolvers, "path" | "hash" | "assets" | "anchors" | "localLinks">> {
const files = new Set<string>(initialFiles);
const fileMethods = new Set<string>(initialFileMethods);
const localImports = new Set<string>(initialLocalImports);
Expand Down
20 changes: 20 additions & 0 deletions test/html-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,26 @@ describe("findAssets(html, path)", () => {
const html = '<script src="test.js" type="other">';
assert.deepStrictEqual(findAssets(html, "foo").files, new Set(["./test.js"]));
});
it("finds anchors by [id] or [name]", () => {
const html = '<a id="id1">foo</a> <a name="id2">bar</a>';
assert.deepStrictEqual(findAssets(html, "foo").anchors, new Set(["id1", "id2"]));
});
it("finds local links by a[href]", () => {
const html = '<a href="#anchor">a</a> <a href="other#baz">b</a> <a href="?test">self</a>';
assert.deepStrictEqual(findAssets(html, "foo").localLinks, new Set(["/foo#anchor", "/other#baz", "/foo?test"]));
});
it("finds relative links", () => {
const html = '<a href="./test">a</a>';
assert.deepStrictEqual(findAssets(html, "foo/bar").localLinks, new Set(["/foo/test"]));
});
it("finds links that go up", () => {
const html = '<a href="../test">a</a>';
assert.deepStrictEqual(findAssets(html, "foo/bar").localLinks, new Set(["/test"]));
});
it("finds links that go above the root", () => {
const html = '<a href="../test">a</a>';
assert.deepStrictEqual(findAssets(html, "foo").localLinks, new Set(["../test"]));
});
});

describe("rewriteHtml(html, resolve)", () => {
Expand Down
3 changes: 3 additions & 0 deletions test/path-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ import {isPathImport, parseRelativeUrl, relativePath, resolveLocalPath, resolveP
describe("resolvePath(source, target)", () => {
it("returns the path to the specified target within the source root", () => {
assert.strictEqual(resolvePath("foo", "baz"), "/baz");
assert.strictEqual(resolvePath("foo", "./"), "/");
assert.strictEqual(resolvePath("./foo", "./baz"), "/baz");
assert.strictEqual(resolvePath("/foo", "baz"), "/baz");
assert.strictEqual(resolvePath("/foo", "./baz"), "/baz");
assert.strictEqual(resolvePath("foo/bar", "baz"), "/foo/baz");
assert.strictEqual(resolvePath("foo/bar", "./"), "/foo/");
assert.strictEqual(resolvePath("./foo/bar", "./baz"), "/foo/baz");
assert.strictEqual(resolvePath("/foo/bar", "baz"), "/foo/baz");
assert.strictEqual(resolvePath("/foo/bar", "./baz"), "/foo/baz");
assert.strictEqual(resolvePath("/foo/bar", "../"), "/");
});
it("allows paths outside the root", () => {
assert.strictEqual(resolvePath("foo", "../baz"), "../baz");
Expand Down

0 comments on commit ca823ce

Please sign in to comment.