Skip to content

Commit

Permalink
feat: add option for setting user agent (#612)
Browse files Browse the repository at this point in the history
  • Loading branch information
JustinBeckwith authored Jul 1, 2024
1 parent 0aea0f1 commit 929caa7
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 12 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ $ linkinator LOCATIONS [ --arguments ]
--url-rewrite-replace
Expression used to replace search content. Must be used with --url-rewrite-search.
--user-agent
The user agent passed in all HTTP requests. Defaults to 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
--verbosity
Override the default verbosity for this command. Available options are
'debug', 'info', 'warning', 'error', and 'none'. Defaults to 'warning'.
Expand Down Expand Up @@ -173,6 +176,7 @@ All options are optional. It should look like this:
"retryErrorsJitter": 5,
"urlRewriteSearch": "/pattern/",
"urlRewriteReplace": "replacement",
"userAgent": "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1)",
}
```

Expand Down Expand Up @@ -223,6 +227,7 @@ where the server is started. Defaults to the path passed in `path`.
- `linksToSkip` (array | function) - An array of regular expression strings that should be skipped, OR an async function that's called for each link with the link URL as its only argument. Return a Promise that resolves to `true` to skip the link or `false` to check it.
- `directoryListing` (boolean) - Automatically serve a static file listing page when serving a directory. Defaults to `false`.
- `urlRewriteExpressions` (array) - Collection of objects that contain a search pattern, and replacement.
- `userAgent` (string) - The [user agent](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent) that should be passed with each request. This uses a reasonable default.

### linkinator.LinkChecker()

Expand Down
3 changes: 3 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ const cli = meow(
--url-rewrite-replace
Expression used to replace search content. Must be used with --url-rewrite-search.
--user-agent
The user agent passed in all HTTP requests. Defaults to 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
--verbosity
Override the default verbosity for this command. Available options are
'debug', 'info', 'warning', 'error', and 'none'. Defaults to 'warning'.
Expand Down
12 changes: 3 additions & 9 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,6 @@ type CrawlOptions = {
retryErrorsJitter: number;
};

// Spoof a normal looking User-Agent to keep the servers happy
export const headers = {
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36',
};

/**
* Instance class used to perform a crawl job.
*/
Expand Down Expand Up @@ -255,7 +249,7 @@ export class LinkChecker extends EventEmitter {
response = await request<Readable>({
method: options.crawl ? 'GET' : 'HEAD',
url: options.url.href,
headers,
headers: { 'User-Agent': options.checkOptions.userAgent },
responseType: 'stream',
validateStatus: () => true,
timeout: options.checkOptions.timeout,
Expand All @@ -269,7 +263,7 @@ export class LinkChecker extends EventEmitter {
response = await request<Readable>({
method: 'GET',
url: options.url.href,
headers,
headers: { 'User-Agent': options.checkOptions.userAgent },
responseType: 'stream',
validateStatus: () => true,
timeout: options.checkOptions.timeout,
Expand Down Expand Up @@ -299,7 +293,7 @@ export class LinkChecker extends EventEmitter {
url: options.url.href,
responseType: 'stream',
validateStatus: () => true,
headers,
headers: { 'User-Agent': options.checkOptions.userAgent },
timeout: options.checkOptions.timeout,
});
if (this.shouldRetryAfter(response, options)) {
Expand Down
5 changes: 5 additions & 0 deletions src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ export type CheckOptions = {
retryErrorsCount?: number;
retryErrorsJitter?: number;
urlRewriteExpressions?: UrlRewriteExpression[];
userAgent?: string;
};

export type InternalCheckOptions = {
syntheticServerRoot?: string;
staticHttpServerHost?: string;
} & CheckOptions;

export const DEFAULT_USER_AGENT =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36';

/**
* Validate the provided flags all work with each other.
* @param options CheckOptions passed in from the CLI (or API)
Expand Down Expand Up @@ -75,6 +79,7 @@ export async function processOptions(
);
}

options.userAgent = options.userAgent ?? DEFAULT_USER_AGENT;
options.serverRoot &&= path.normalize(options.serverRoot);

// Expand globs into paths
Expand Down
21 changes: 18 additions & 3 deletions test/test.index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import {
LinkChecker,
LinkState,
check,
headers,
} from '../src/index.js';
import { DEFAULT_USER_AGENT } from '../src/options.js';

nock.disableNetConnect();
nock.enableNetConnect('localhost');
Expand Down Expand Up @@ -450,12 +450,16 @@ describe('linkinator', () => {
it('should always send a human looking User-Agent', async () => {
const scopes = [
nock('http://fake.local')
.get('/', undefined, { reqheaders: headers })
.get('/', undefined, {
reqheaders: { 'User-Agent': DEFAULT_USER_AGENT },
})
.replyWithFile(200, 'test/fixtures/local/index.html', {
'Content-Type': 'text/html; charset=UTF-8',
}),
nock('http://fake.local')
.get('/page2.html', undefined, { reqheaders: headers })
.get('/page2.html', undefined, {
reqheaders: { 'User-Agent': DEFAULT_USER_AGENT },
})
.replyWithFile(200, 'test/fixtures/local/page2.html', {
'Content-Type': 'text/html; charset=UTF-8',
}),
Expand Down Expand Up @@ -594,4 +598,15 @@ describe('linkinator', () => {
});
assert.ok(results.passed);
});

it('should accept a custom user agent', async () => {
const userAgent = 'linkinator-test';
const scope = nock('http://fake.local')
.head('/')
.matchHeader('user-agent', userAgent)
.reply(200);
const results = await check({ path: 'test/fixtures/basic', userAgent });
assert.ok(results.passed);
scope.done();
});
});

0 comments on commit 929caa7

Please sign in to comment.