|
| 1 | +// Copyright 2017 TODO Group. All rights reserved. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +const { HtmlChecker, BLC_INVALID } = require('broken-link-checker') |
| 5 | +const path = require('path') |
| 6 | +const GitHubMarkup = require('../lib/github_markup') |
| 7 | +const Result = require('../lib/result') |
| 8 | +// eslint-disable-next-line no-unused-vars |
| 9 | +const FileSystem = require('../lib/file_system') |
| 10 | + |
| 11 | +// TODO: how to autoprefix domains with http or https? |
| 12 | +/** |
| 13 | + * Searches for a renderable markup document, renders it, and then |
| 14 | + * checks for broken links by scanning the html. |
| 15 | + * |
| 16 | + * @param {FileSystem} fs A filesystem object configured with filter paths and target directories |
| 17 | + * @param {object} options The rule configuration |
| 18 | + * @returns {Promise<Result>} The lint rule result |
| 19 | + */ |
| 20 | +async function fileNoBrokenLinks(fs, options) { |
| 21 | + const files = await fs.findAllFiles(options.globsAll, !!options.nocase) |
| 22 | + |
| 23 | + if (files.length === 0) { |
| 24 | + return new Result( |
| 25 | + 'Did not find file matching the specified patterns', |
| 26 | + options.globsAll.map(f => { |
| 27 | + return { passed: false, pattern: f } |
| 28 | + }), |
| 29 | + !!options['succeed-on-non-existent'] |
| 30 | + ) |
| 31 | + } |
| 32 | + |
| 33 | + // for every file check every broken link |
| 34 | + const results = await Promise.all( |
| 35 | + files.map(async f => { |
| 36 | + // render it, if possible |
| 37 | + const absMdPath = path.resolve(fs.targetDir, f) |
| 38 | + const rendered = await GitHubMarkup.renderMarkup(absMdPath) |
| 39 | + if (rendered === null) { |
| 40 | + return { |
| 41 | + passed: true, |
| 42 | + path: f, |
| 43 | + message: 'Ignored due to unknown file format.' |
| 44 | + } |
| 45 | + } |
| 46 | + |
| 47 | + // scan the rendered HTML for broken links |
| 48 | + const linkRes = await new Promise((resolve, reject) => { |
| 49 | + const results = [] |
| 50 | + const htmlChecker = new HtmlChecker(options, { |
| 51 | + link: res => results.push(res), |
| 52 | + complete: () => resolve(results), |
| 53 | + acceptedSchemes: ['http', 'https', ''] |
| 54 | + }) |
| 55 | + if (!htmlChecker.scan(rendered)) { |
| 56 | + reject(new Error(`Unable to scan file ${f}`)) |
| 57 | + } |
| 58 | + }) |
| 59 | + |
| 60 | + // find all relative links, and double check the filesystem for their existence |
| 61 | + // filter down to broken links |
| 62 | + const brokenLinks = linkRes.filter(({ broken }) => broken) |
| 63 | + // split into invalid and otherwise failing |
| 64 | + const { failing, invalid } = brokenLinks.reduce( |
| 65 | + (res, linkRes) => { |
| 66 | + linkRes.brokenReason === BLC_INVALID |
| 67 | + ? res.invalid.push(linkRes) |
| 68 | + : res.failing.push(linkRes) |
| 69 | + return res |
| 70 | + }, |
| 71 | + { failing: [], invalid: [] } |
| 72 | + ) |
| 73 | + // make the messages for the failing URLs |
| 74 | + const failingMessages = failing.map( |
| 75 | + ({ |
| 76 | + brokenReason, |
| 77 | + url: { original }, |
| 78 | + http: { |
| 79 | + response: { statusCode = null } |
| 80 | + } |
| 81 | + }) => |
| 82 | + `${original} (${ |
| 83 | + brokenReason.includes('HTTP') |
| 84 | + ? `status code ${statusCode}` |
| 85 | + : `unknown error ${brokenReason}` |
| 86 | + })` |
| 87 | + ) |
| 88 | + // process the invalid links to check if they're actually filesystem paths |
| 89 | + // returning the message for invalid URLs |
| 90 | + const failingInvalidMessagesWithNulls = await Promise.all( |
| 91 | + invalid.map(async b => { |
| 92 | + const { |
| 93 | + url: { original } |
| 94 | + } = b |
| 95 | + // verify the path is relative, else the path is invalid |
| 96 | + if (path.posix.isAbsolute(original)) |
| 97 | + return `${original} (invalid path)` |
| 98 | + // strip any #thing specifiers from the path, since it's too hard to check |
| 99 | + const strippedPath = original.replace(/#(?:[.!/\\\w]*)$/, '') |
| 100 | + if (!strippedPath) return null |
| 101 | + // verify the path doesn't traverse outside the project, else the path is excluded |
| 102 | + const targetDir = path.posix.resolve(fs.targetDir) |
| 103 | + const absPath = path.posix.resolve(targetDir, strippedPath) |
| 104 | + const relPath = path.posix.relative(targetDir, absPath) |
| 105 | + if (relPath.startsWith('..')) return null |
| 106 | + // verify the file exists (or at least that we have access to it) |
| 107 | + if (!(await fs.relativeFileExists(relPath))) |
| 108 | + return `${original} (file does not exist)` |
| 109 | + return null |
| 110 | + }) |
| 111 | + ) |
| 112 | + // remove messages which didn't fail |
| 113 | + const failingInvalidMessages = failingInvalidMessagesWithNulls.filter( |
| 114 | + m => m !== null |
| 115 | + ) |
| 116 | + // join all the messages together to form the result |
| 117 | + const allMessages = failingInvalidMessages.concat(failingMessages) |
| 118 | + return { |
| 119 | + passed: allMessages.length === 0, |
| 120 | + path: f, |
| 121 | + message: |
| 122 | + allMessages.length === 0 |
| 123 | + ? 'All links are valid' |
| 124 | + : allMessages.concat(', ') |
| 125 | + } |
| 126 | + }) |
| 127 | + ) |
| 128 | + // return the final result |
| 129 | + const passed = results.every(({ passed }) => passed) |
| 130 | + return new Result('', results, passed) |
| 131 | +} |
| 132 | + |
| 133 | +module.exports = fileNoBrokenLinks |
0 commit comments