Skip to content

Commit

Permalink
url: treat special characters in hostnames more strictly in url.parse()
Browse files Browse the repository at this point in the history
Remove tabs/newlines/carriage returns, similar to WHATWG URL.

Throw if ^, |, and some other special characters are in the hostname,
similar to WHATWG URL.

Use punycode/toAscii when % appears in a hostname, like WHATWG URL.
  • Loading branch information
Trott committed Oct 19, 2022
1 parent 87cdf7d commit 380bbd3
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 40 deletions.
9 changes: 2 additions & 7 deletions lib/internal/idna.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
'use strict';

if (internalBinding('config').hasIntl) {
const { toASCII, toUnicode } = internalBinding('icu');
module.exports = { toASCII, toUnicode };
} else {
const { domainToASCII, domainToUnicode } = require('internal/url');
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };
}
const { domainToASCII, domainToUnicode } = require('internal/url');
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };
49 changes: 28 additions & 21 deletions lib/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@ const slashedProtocol = new SafeSet([
const {
CHAR_SPACE,
CHAR_TAB,
CHAR_CARRIAGE_RETURN,
CHAR_LINE_FEED,
CHAR_NO_BREAK_SPACE,
CHAR_ZERO_WIDTH_NOBREAK_SPACE,
CHAR_HASH,
Expand Down Expand Up @@ -314,41 +312,42 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
let hostEnd = -1;
let atSign = -1;
let nonHost = -1;
let invalid = -1;
for (let i = 0; i < rest.length; ++i) {
switch (rest.charCodeAt(i)) {
case CHAR_TAB:
case CHAR_LINE_FEED:
case CHAR_CARRIAGE_RETURN:
case CHAR_SPACE:
case CHAR_DOUBLE_QUOTE:
case CHAR_PERCENT:
case CHAR_SINGLE_QUOTE:
case CHAR_SEMICOLON:
case CHAR_LEFT_ANGLE_BRACKET:
case CHAR_RIGHT_ANGLE_BRACKET:
case CHAR_BACKWARD_SLASH:
case CHAR_CIRCUMFLEX_ACCENT:
case CHAR_VERTICAL_LINE:
case CHAR_TAB:
case CHAR_DOUBLE_QUOTE:
case CHAR_SINGLE_QUOTE:
case CHAR_GRAVE_ACCENT:
case CHAR_LEFT_CURLY_BRACKET:
case CHAR_VERTICAL_LINE:
case CHAR_RIGHT_CURLY_BRACKET:
// Characters that are never ever allowed in a hostname from RFC 2396
if (nonHost === -1)
nonHost = i;
// Characters that are never ever allowed in a hostname from RFC 2396
if (invalid === -1) {
invalid = i;
}
break;
case CHAR_SEMICOLON:
case CHAR_BACKWARD_SLASH:
case CHAR_HASH:
case CHAR_FORWARD_SLASH:
case CHAR_QUESTION_MARK:
// Find the first instance of any host-ending characters
if (nonHost === -1)
if (nonHost === -1) {
nonHost = i;
}
hostEnd = i;
break;
case CHAR_AT:
// At this point, either we have an explicit point where the
// auth portion cannot go past, or the last @ char is the decider.
atSign = i;
nonHost = -1;
invalid = -1;
break;
}
if (hostEnd !== -1)
Expand All @@ -360,10 +359,18 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
start = atSign + 1;
}
if (nonHost === -1) {
if (invalid !== -1) {
throw new ERR_INVALID_URL(url);
}
this.host = rest.slice(start);
rest = '';
} else {
if (invalid > -1 && invalid < nonHost) {
throw new ERR_INVALID_URL(url);
}
this.host = rest.slice(start, nonHost);
// WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too.
this.host = this.host.replaceAll(/[\t\n\r]/g, '');
rest = rest.slice(nonHost);
}

Expand Down Expand Up @@ -505,13 +512,13 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
function getHostname(self, rest, hostname, url) {
for (let i = 0; i < hostname.length; ++i) {
const code = hostname.charCodeAt(i);
const isValid = (code !== CHAR_FORWARD_SLASH &&
code !== CHAR_BACKWARD_SLASH &&
code !== CHAR_HASH &&
code !== CHAR_QUESTION_MARK &&
code !== CHAR_COLON);
const isHostEnd = (code === CHAR_FORWARD_SLASH ||
code === CHAR_BACKWARD_SLASH ||
code === CHAR_HASH ||
code === CHAR_QUESTION_MARK ||
code === CHAR_COLON);

if (!isValid) {
if (isHostEnd) {
// If leftover starts with :, then it represents an invalid port.
if (hostname.charCodeAt(i) === 58) {
throw new ERR_INVALID_URL(url);
Expand Down
6 changes: 0 additions & 6 deletions test/parallel/test-url-format.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,6 @@ const formatTests = {
hash: '#frag=?bar/#frag',
pathname: '/'
},
'http://google.com" onload="alert(42)/': {
href: 'http://google.com/%22%20onload=%22alert(42)/',
protocol: 'http:',
host: 'google.com',
pathname: '/%22%20onload=%22alert(42)/'
},
'http://a.com/a/b/c?s#h': {
href: 'http://a.com/a/b/c?s#h',
protocol: 'http',
Expand Down
12 changes: 6 additions & 6 deletions test/parallel/test-url-parse-format.js
Original file line number Diff line number Diff line change
Expand Up @@ -854,15 +854,15 @@ const parseTests = {
protocol: 'http:',
slashes: true,
auth: 'a\r" \t\n<\'b:b',
host: 'c',
host: 'cd',
port: null,
hostname: 'c',
hostname: 'cd',
hash: null,
search: '?f',
query: 'f',
pathname: '%0D%0Ad/e',
path: '%0D%0Ad/e?f',
href: 'http://a%0D%22%20%09%0A%3C\'b:b@c/%0D%0Ad/e?f'
pathname: '/e',
path: '/e?f',
href: "http://a%0D%22%20%09%0A%3C'b:b@cd/e?f",
},

'https://*': {
Expand Down Expand Up @@ -1007,7 +1007,7 @@ for (const u in parseTests) {
assert.deepStrictEqual(
actual,
expected,
`expected ${inspect(expected)}, got ${inspect(actual)}`
`parsing ${inspect(u)}, expected ${inspect(expected)}, got ${inspect(actual)}`
);
assert.deepStrictEqual(
spaced,
Expand Down
15 changes: 15 additions & 0 deletions test/parallel/test-url-parse-invalid-input.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,18 @@ if (common.hasIntl) {
`parsing ${badURL}`);
});
}

{
const badChars = [ ' ', '>', '<', '^', '|' ];
for (const badChar of badChars) {
const badURL = `http://fail${badChar}fail.com/`;
assert.throws(() => { url.parse(badURL); },
(e) => e.code === 'ERR_INVALID_URL',
`parsing "${badURL}"`);
}
}

assert.throws(() => { url.parse('http://google.com" onload="alert(42)/'); },
(e) => e.code === 'ERR_INVALID_URL',
'parsing \'http://google.com" onload="alert(42)/\''
);

0 comments on commit 380bbd3

Please sign in to comment.