Skip to content

Commit 9e8bb98

Browse files
feat: WIP adding a broken link checker rule
1 parent 8d21449 commit 9e8bb98

19 files changed

+595
-0
lines changed

lib/github_markup.js

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright 2018 TODO Group. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
const { commandExists } = require('./command_exists')
5+
const spawnSync = require('child_process').spawnSync
6+
7+
class GitHubMarkup {
8+
/**
9+
* Returns a rendered version of a given README file, or null if the document
10+
* cannot be rendered. Supports all formats used by github_markup.
11+
*
12+
* Throws 'GitHub Markup not installed' error if command line of 'github_markup' is not available.
13+
*
14+
* @param {string} targetFile The file to render
15+
* @returns {Promise<string|null>} The rendered markup, or null if it cannot be rendered
16+
*/
17+
async renderMarkup(targetFile) {
18+
// TODO: windows?
19+
const command = await commandExists(['github-markup'])
20+
if (command === null) {
21+
throw new Error('GitHub markup not installed')
22+
}
23+
const gitHubMarkupRes = spawnSync(
24+
`${__dirname}/github_markup_check_and_render`,
25+
[targetFile]
26+
)
27+
if (gitHubMarkupRes.status !== 0) {
28+
return null
29+
}
30+
return gitHubMarkupRes.stdout.toString()
31+
}
32+
}
33+
34+
module.exports = new GitHubMarkup()

lib/github_markup_check_and_render

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env ruby
2+
# Modified github_markup utility which checks first that the file can be rendered before rendering
3+
4+
$LOAD_PATH.unshift File.dirname(File.realpath(__FILE__)) + "/../lib"
5+
require 'github/markup'
6+
7+
if ARGV.size < 1
8+
print "usage: #{File.basename($0)} FILE\n"
9+
exit 1
10+
end
11+
12+
name = ARGV.first
13+
file_contents = nil
14+
15+
begin
16+
file = File.open( name, "r" )
17+
file_contents = file.read
18+
file.close
19+
rescue Exception => e
20+
$stderr.print "error: #{e.message}\n"
21+
exit 1
22+
ensure
23+
end
24+
25+
26+
if GitHub::Markup.can_render?( name, file_contents )
27+
print GitHub::Markup.render( name, file_contents )
28+
exit 0
29+
else
30+
print "File '#{name}' cannot be rendered.\n"
31+
exit 1
32+
end

rules/file-no-broken-links.js

+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// Copyright 2017 TODO Group. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
const { HtmlChecker, BLC_INVALID } = require('broken-link-checker')
5+
const path = require('path')
6+
const GitHubMarkup = require('../lib/github_markup')
7+
const Result = require('../lib/result')
8+
// eslint-disable-next-line no-unused-vars
9+
const FileSystem = require('../lib/file_system')
10+
11+
// TODO: how to autoprefix domains with http or https?
12+
/**
13+
* Searches for a renderable markup document, renders it, and then
14+
* checks for broken links by scanning the html.
15+
*
16+
* @param {FileSystem} fs A filesystem object configured with filter paths and target directories
17+
* @param {object} options The rule configuration
18+
* @returns {Promise<Result>} The lint rule result
19+
*/
20+
async function fileNoBrokenLinks(fs, options) {
21+
const files = await fs.findAllFiles(options.globsAll, !!options.nocase)
22+
23+
if (files.length === 0) {
24+
return new Result(
25+
'Did not find file matching the specified patterns',
26+
options.globsAll.map(f => {
27+
return { passed: false, pattern: f }
28+
}),
29+
!!options['succeed-on-non-existent']
30+
)
31+
}
32+
33+
// for every file check every broken link
34+
const results = await Promise.all(
35+
files.map(async f => {
36+
// render it, if possible
37+
const absMdPath = path.resolve(fs.targetDir, f)
38+
const rendered = await GitHubMarkup.renderMarkup(absMdPath)
39+
if (rendered === null) {
40+
return {
41+
passed: true,
42+
path: f,
43+
message: 'Ignored due to unknown file format.'
44+
}
45+
}
46+
47+
// scan the rendered HTML for broken links
48+
const linkRes = await new Promise((resolve, reject) => {
49+
const results = []
50+
const htmlChecker = new HtmlChecker(options, {
51+
link: res => results.push(res),
52+
complete: () => resolve(results),
53+
acceptedSchemes: ['http', 'https', '']
54+
})
55+
if (!htmlChecker.scan(rendered)) {
56+
reject(new Error(`Unable to scan file ${f}`))
57+
}
58+
})
59+
60+
// find all relative links, and double check the filesystem for their existence
61+
// filter down to broken links
62+
const brokenLinks = linkRes.filter(({ broken }) => broken)
63+
// split into invalid and otherwise failing
64+
const { failing, invalid } = brokenLinks.reduce(
65+
(res, linkRes) => {
66+
linkRes.brokenReason === BLC_INVALID
67+
? res.invalid.push(linkRes)
68+
: res.failing.push(linkRes)
69+
return res
70+
},
71+
{ failing: [], invalid: [] }
72+
)
73+
// make the messages for the failing URLs
74+
const failingMessages = failing.map(
75+
({
76+
brokenReason,
77+
url: { original },
78+
http: {
79+
response: { statusCode = null }
80+
}
81+
}) =>
82+
`${original} (${
83+
brokenReason.includes('HTTP')
84+
? `status code ${statusCode}`
85+
: `unknown error ${brokenReason}`
86+
})`
87+
)
88+
// process the invalid links to check if they're actually filesystem paths
89+
// returning the message for invalid URLs
90+
const failingInvalidMessagesWithNulls = await Promise.all(
91+
invalid.map(async b => {
92+
const {
93+
url: { original }
94+
} = b
95+
// verify the path is relative, else the path is invalid
96+
if (path.posix.isAbsolute(original))
97+
return `${original} (invalid path)`
98+
// strip any #thing specifiers from the path, since it's too hard to check
99+
const strippedPath = original.replace(/#(?:[.!/\\\w]*)$/, '')
100+
if (!strippedPath) return null
101+
// verify the path doesn't traverse outside the project, else the path is excluded
102+
const targetDir = path.posix.resolve(fs.targetDir)
103+
const absPath = path.posix.resolve(targetDir, strippedPath)
104+
const relPath = path.posix.relative(targetDir, absPath)
105+
if (relPath.startsWith('..')) return null
106+
// verify the file exists (or at least that we have access to it)
107+
if (!(await fs.relativeFileExists(relPath)))
108+
return `${original} (file does not exist)`
109+
return null
110+
})
111+
)
112+
// remove messages which didn't fail
113+
const failingInvalidMessages = failingInvalidMessagesWithNulls.filter(
114+
m => m !== null
115+
)
116+
// join all the messages together to form the result
117+
const allMessages = failingInvalidMessages.concat(failingMessages)
118+
return {
119+
passed: allMessages.length === 0,
120+
path: f,
121+
message:
122+
allMessages.length === 0
123+
? 'All links are valid'
124+
: allMessages.concat(', ')
125+
}
126+
})
127+
)
128+
// return the final result
129+
const passed = results.every(({ passed }) => passed)
130+
return new Result('', results, passed)
131+
}
132+
133+
module.exports = fileNoBrokenLinks

tests/lib/MarkdownForTest.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# A Section
2+
3+
Some text.

tests/lib/github_markup_tests.js

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright 2017 TODO Group. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
const chai = require('chai')
5+
const expect = chai.expect
6+
const GitHubMarkup = require('../../lib/github_markup')
7+
const { commandExists } = require('../../lib/command_exists')
8+
9+
describe('lib', () => {
10+
describe('github_markup', function () {
11+
const gitHubMarkupInstalled = commandExists('github-markup')
12+
this.timeout(30000)
13+
14+
if (!gitHubMarkupInstalled) {
15+
it.skip('tests github markup functionality', () => {})
16+
} else {
17+
it('should render a markdown file', async () => {
18+
const res = await GitHubMarkup.renderMarkup(
19+
`${__dirname}/MarkdownForTest.md`
20+
)
21+
expect(res).to.contain('Some text')
22+
})
23+
24+
it('should render an rst file', async () => {
25+
const res = await GitHubMarkup.renderMarkup(
26+
`${__dirname}/rst_for_test.rst`
27+
)
28+
expect(res).to.contain(
29+
'https://opensource.newrelic.com/oss-category/#community-plus'
30+
)
31+
})
32+
33+
it('should fail to render a non-markup file', async () => {
34+
const res = await GitHubMarkup.renderMarkup(
35+
`${__dirname}/image_for_test.png`
36+
)
37+
expect(res).to.equal(null)
38+
})
39+
40+
it("should fail to render a file that doesn't exist", async () => {
41+
const res = await GitHubMarkup.renderMarkup(`${__dirname}/not_a_file`)
42+
expect(res).to.equal(null)
43+
})
44+
}
45+
})
46+
})

tests/lib/rst_for_test.rst

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
|header|
2+
3+
.. |header| image:: https://github.com/newrelic/opensource-website/raw/master/src/images/categories/Community_Plus.png
4+
:target: https://opensource.newrelic.com/oss-category/#community-plus
5+
6+
New Relic Python Agent
7+
======================
8+
9+
The ``newrelic`` package instruments your application for performance monitoring and advanced performance analytics with `New Relic`_.

0 commit comments

Comments
 (0)