-
Notifications
You must be signed in to change notification settings - Fork 29.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
url: enforce valid UTF-8 in WHATWG parser
This commit implements the Web IDL USVString conversion, which mandates all unpaired Unicode surrogates be turned into U+FFFD REPLACEMENT CHARACTER. It also disallows Symbols to be used as USVString per spec. Certain functions call into C++ methods in the binding that use the Utf8Value class to access string arguments. Utf8Value already does the normalization using V8's String::Write, so in those cases, instead of doing the full USVString normalization, only a symbol check is done (`'' + val`, which uses ES's ToString, versus `String()` which has special provisions for symbols). PR-URL: #11436 Reviewed-By: Ben Noordhuis <[email protected]> Reviewed-By: James M Snell <[email protected]>
- Loading branch information
Showing
13 changed files
with
509 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,18 @@ const IteratorPrototype = Object.getPrototypeOf( | |
Object.getPrototypeOf([][Symbol.iterator]()) | ||
); | ||
|
||
const unpairedSurrogateRe = | ||
/([^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/; | ||
function toUSVString(val) { | ||
const str = '' + val; | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
TimothyGu
Author
Member
|
||
// As of V8 5.5, `str.search()` (and `unpairedSurrogateRe[@@search]()`) are | ||
// slower than `unpairedSurrogateRe.exec()`. | ||
const match = unpairedSurrogateRe.exec(str); | ||
if (!match) | ||
return str; | ||
return binding.toUSVString(str, match.index); | ||
} | ||
|
||
class OpaqueOrigin { | ||
toString() { | ||
return 'null'; | ||
|
@@ -104,7 +116,6 @@ function onParseComplete(flags, protocol, username, password, | |
|
||
// Reused by URL constructor and URL#href setter. | ||
function parse(url, input, base) { | ||
input = String(input); | ||
const base_context = base ? base[context] : undefined; | ||
url[context] = new StorageObject(); | ||
binding.parse(input.trim(), -1, | ||
|
@@ -206,8 +217,10 @@ function onParseHashComplete(flags, protocol, username, password, | |
|
||
class URL { | ||
constructor(input, base) { | ||
// toUSVString is not needed. | ||
input = '' + input; | ||
if (base !== undefined && !(base instanceof URL)) | ||
base = new URL(String(base)); | ||
base = new URL(base); | ||
parse(this, input, base); | ||
} | ||
|
||
|
@@ -315,6 +328,8 @@ Object.defineProperties(URL.prototype, { | |
return this[kFormat]({}); | ||
}, | ||
set(input) { | ||
// toUSVString is not needed. | ||
input = '' + input; | ||
parse(this, input); | ||
} | ||
}, | ||
|
@@ -332,7 +347,8 @@ Object.defineProperties(URL.prototype, { | |
return this[context].scheme; | ||
}, | ||
set(scheme) { | ||
scheme = String(scheme); | ||
// toUSVString is not needed. | ||
scheme = '' + scheme; | ||
if (scheme.length === 0) | ||
return; | ||
binding.parse(scheme, binding.kSchemeStart, null, this[context], | ||
|
@@ -346,7 +362,8 @@ Object.defineProperties(URL.prototype, { | |
return this[context].username || ''; | ||
}, | ||
set(username) { | ||
username = String(username); | ||
// toUSVString is not needed. | ||
username = '' + username; | ||
if (!this.hostname) | ||
return; | ||
const ctx = this[context]; | ||
|
@@ -366,7 +383,8 @@ Object.defineProperties(URL.prototype, { | |
return this[context].password || ''; | ||
}, | ||
set(password) { | ||
password = String(password); | ||
// toUSVString is not needed. | ||
password = '' + password; | ||
if (!this.hostname) | ||
return; | ||
const ctx = this[context]; | ||
|
@@ -391,7 +409,8 @@ Object.defineProperties(URL.prototype, { | |
}, | ||
set(host) { | ||
const ctx = this[context]; | ||
host = String(host); | ||
// toUSVString is not needed. | ||
host = '' + host; | ||
if (this[cannotBeBase] || | ||
(this[special] && host.length === 0)) { | ||
// Cannot set the host if cannot-be-base is set or | ||
|
@@ -415,7 +434,8 @@ Object.defineProperties(URL.prototype, { | |
}, | ||
set(host) { | ||
const ctx = this[context]; | ||
host = String(host); | ||
// toUSVString is not needed. | ||
host = '' + host; | ||
if (this[cannotBeBase] || | ||
(this[special] && host.length === 0)) { | ||
// Cannot set the host if cannot-be-base is set or | ||
|
@@ -439,11 +459,12 @@ Object.defineProperties(URL.prototype, { | |
return port === undefined ? '' : String(port); | ||
}, | ||
set(port) { | ||
// toUSVString is not needed. | ||
port = '' + port; | ||
const ctx = this[context]; | ||
if (!ctx.host || this[cannotBeBase] || | ||
this.protocol === 'file:') | ||
return; | ||
port = String(port); | ||
if (port === '') { | ||
ctx.port = undefined; | ||
return; | ||
|
@@ -462,9 +483,11 @@ Object.defineProperties(URL.prototype, { | |
return ctx.path !== undefined ? `/${ctx.path.join('/')}` : ''; | ||
}, | ||
set(path) { | ||
// toUSVString is not needed. | ||
path = '' + path; | ||
if (this[cannotBeBase]) | ||
return; | ||
binding.parse(String(path), binding.kPathStart, null, this[context], | ||
binding.parse(path, binding.kPathStart, null, this[context], | ||
onParsePathComplete.bind(this)); | ||
} | ||
}, | ||
|
@@ -477,7 +500,7 @@ Object.defineProperties(URL.prototype, { | |
}, | ||
set(search) { | ||
const ctx = this[context]; | ||
search = String(search); | ||
search = toUSVString(search); | ||
if (!search) { | ||
ctx.query = null; | ||
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY; | ||
|
@@ -509,7 +532,8 @@ Object.defineProperties(URL.prototype, { | |
}, | ||
set(hash) { | ||
const ctx = this[context]; | ||
hash = String(hash); | ||
// toUSVString is not needed. | ||
hash = '' + hash; | ||
if (this.protocol === 'javascript:') | ||
return; | ||
if (!hash) { | ||
|
@@ -652,19 +676,22 @@ class URLSearchParams { | |
if (pair.length !== 2) { | ||
throw new TypeError('Each query pair must be a name/value tuple'); | ||
} | ||
this[searchParams].push(String(pair[0]), String(pair[1])); | ||
const key = toUSVString(pair[0]); | ||
const value = toUSVString(pair[1]); | ||
this[searchParams].push(key, value); | ||
} | ||
} else { | ||
// record<USVString, USVString> | ||
this[searchParams] = []; | ||
for (const key of Object.keys(init)) { | ||
const value = String(init[key]); | ||
for (var key of Object.keys(init)) { | ||
key = toUSVString(key); | ||
const value = toUSVString(init[key]); | ||
this[searchParams].push(key, value); | ||
} | ||
} | ||
} else { | ||
// USVString | ||
init = String(init); | ||
init = toUSVString(init); | ||
if (init[0] === '?') init = init.slice(1); | ||
initSearchParams(this, init); | ||
} | ||
|
@@ -743,8 +770,8 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { | |
throw new TypeError('"name" and "value" arguments must be specified'); | ||
} | ||
|
||
name = String(name); | ||
value = String(value); | ||
name = toUSVString(name); | ||
value = toUSVString(value); | ||
this[searchParams].push(name, value); | ||
update(this[context], this); | ||
}, | ||
|
@@ -758,7 +785,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { | |
} | ||
|
||
const list = this[searchParams]; | ||
name = String(name); | ||
name = toUSVString(name); | ||
for (var i = 0; i < list.length;) { | ||
const cur = list[i]; | ||
if (cur === name) { | ||
|
@@ -779,7 +806,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { | |
} | ||
|
||
const list = this[searchParams]; | ||
name = String(name); | ||
name = toUSVString(name); | ||
for (var i = 0; i < list.length; i += 2) { | ||
if (list[i] === name) { | ||
return list[i + 1]; | ||
|
@@ -798,7 +825,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { | |
|
||
const list = this[searchParams]; | ||
const values = []; | ||
name = String(name); | ||
name = toUSVString(name); | ||
for (var i = 0; i < list.length; i += 2) { | ||
if (list[i] === name) { | ||
values.push(list[i + 1]); | ||
|
@@ -816,7 +843,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { | |
} | ||
|
||
const list = this[searchParams]; | ||
name = String(name); | ||
name = toUSVString(name); | ||
for (var i = 0; i < list.length; i += 2) { | ||
if (list[i] === name) { | ||
return true; | ||
|
@@ -834,8 +861,8 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { | |
} | ||
|
||
const list = this[searchParams]; | ||
name = String(name); | ||
value = String(value); | ||
name = toUSVString(name); | ||
value = toUSVString(value); | ||
|
||
// If there are any name-value pairs whose name is `name`, in `list`, set | ||
// the value of the first such name-value pair to `value` and remove the | ||
|
@@ -1098,11 +1125,13 @@ function originFor(url, base) { | |
} | ||
|
||
function domainToASCII(domain) { | ||
return binding.domainToASCII(String(domain)); | ||
// toUSVString is not needed. | ||
return binding.domainToASCII('' + domain); | ||
} | ||
|
||
function domainToUnicode(domain) { | ||
return binding.domainToUnicode(String(domain)); | ||
// toUSVString is not needed. | ||
return binding.domainToUnicode('' + domain); | ||
} | ||
|
||
// Utility function that converts a URL object into an ordinary | ||
|
@@ -1188,11 +1217,14 @@ function getPathFromURL(path) { | |
return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); | ||
} | ||
|
||
exports.getPathFromURL = getPathFromURL; | ||
exports.URL = URL; | ||
exports.URLSearchParams = URLSearchParams; | ||
exports.domainToASCII = domainToASCII; | ||
exports.domainToUnicode = domainToUnicode; | ||
exports.urlToOptions = urlToOptions; | ||
exports.formatSymbol = kFormat; | ||
exports.searchParamsSymbol = searchParams; | ||
module.exports = { | ||
toUSVString, | ||
getPathFromURL, | ||
URL, | ||
URLSearchParams, | ||
domainToASCII, | ||
domainToUnicode, | ||
urlToOptions, | ||
formatSymbol: kFormat, | ||
searchParamsSymbol: searchParams | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
This will not perform ToString(), but instead ToPrimitive(). For example, if val is
{ toString() { return "5"; }, toPrimitive() { return "6"; }
, you will get"6"
.