Skip to content

Commit 4eb0749

Browse files
thisalihassantargos
authored andcommitted
url: implement parse method for safer URL parsing
Implement the static parse method as per the WHATWG URL specification. Unlike the URL constructor, URL.parse does not throw on invalid input, instead returning null. This behavior allows safer parsing of URLs without the need for try-catch blocks around constructor calls. The implementation follows the steps outlined in the WHATWG URL standard, ensuring compatibility and consistency with web platform URL parsing APIs. Fixes: #52208 Refs: whatwg/url#825 PR-URL: #52280 Reviewed-By: Yagiz Nizipli <[email protected]> Reviewed-By: Matteo Collina <[email protected]> Reviewed-By: Daniel Lemire <[email protected]> Reviewed-By: Benjamin Gruenbaum <[email protected]>
1 parent 2fb7cc9 commit 4eb0749

File tree

5 files changed

+267
-5
lines changed

5 files changed

+267
-5
lines changed

lib/internal/url.js

+22-2
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,14 @@ function isURL(self) {
764764
return Boolean(self?.href && self.protocol && self.auth === undefined && self.path === undefined);
765765
}
766766

767+
/**
768+
* A unique symbol used as a private identifier to safely invoke the URL constructor
769+
* with a special parsing behavior. When passed as the third argument to the URL
770+
* constructor, it signals that the constructor should not throw an exception
771+
* for invalid URL inputs.
772+
*/
773+
const kParseURLSymbol = Symbol('kParseURL');
774+
767775
class URL {
768776
#context = new URLContext();
769777
#searchParams;
@@ -782,7 +790,7 @@ class URL {
782790
};
783791
}
784792

785-
constructor(input, base = undefined) {
793+
constructor(input, base = undefined, parseSymbol = undefined) {
786794
if (arguments.length === 0) {
787795
throw new ERR_MISSING_ARGS('url');
788796
}
@@ -794,7 +802,19 @@ class URL {
794802
base = `${base}`;
795803
}
796804

797-
this.#updateContext(bindingUrl.parse(input, base));
805+
const raiseException = parseSymbol !== kParseURLSymbol;
806+
const href = bindingUrl.parse(input, base, raiseException);
807+
if (href) {
808+
this.#updateContext(href);
809+
}
810+
}
811+
812+
static parse(input, base = undefined) {
813+
if (arguments.length === 0) {
814+
throw new ERR_MISSING_ARGS('url');
815+
}
816+
const parsedURLObject = new URL(input, base, kParseURLSymbol);
817+
return parsedURLObject.href ? parsedURLObject : null;
798818
}
799819

800820
[inspect.custom](depth, opts) {

src/node_url.cc

+9-2
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,9 @@ void BindingData::Parse(const FunctionCallbackInfo<Value>& args) {
262262
CHECK_GE(args.Length(), 1);
263263
CHECK(args[0]->IsString()); // input
264264
// args[1] // base url
265+
// args[2] // raise Exception
266+
267+
const bool raise_exception = args.Length() > 2 && args[2]->IsTrue();
265268

266269
Realm* realm = Realm::GetCurrent(args);
267270
BindingData* binding_data = realm->GetBindingData<BindingData>();
@@ -274,16 +277,20 @@ void BindingData::Parse(const FunctionCallbackInfo<Value>& args) {
274277
if (args[1]->IsString()) {
275278
base_ = Utf8Value(isolate, args[1]).ToString();
276279
base = ada::parse<ada::url_aggregator>(*base_);
277-
if (!base) {
280+
if (!base && raise_exception) {
278281
return ThrowInvalidURL(realm->env(), input.ToStringView(), base_);
282+
} else if (!base) {
283+
return;
279284
}
280285
base_pointer = &base.value();
281286
}
282287
auto out =
283288
ada::parse<ada::url_aggregator>(input.ToStringView(), base_pointer);
284289

285-
if (!out) {
290+
if (!out && raise_exception) {
286291
return ThrowInvalidURL(realm->env(), input.ToStringView(), base_);
292+
} else if (!out) {
293+
return;
287294
}
288295

289296
binding_data->UpdateComponents(out->get_components(), out->type);

test/fixtures/wpt/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ Last update:
3636
- webmessaging/broadcastchannel: https://github.com/web-platform-tests/wpt/tree/e97fac4791/webmessaging/broadcastchannel
3737

3838
[Web Platform Tests]: https://github.com/web-platform-tests/wpt
39-
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/main/docs/git-node.md#git-node-wpt
39+
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/main/docs/git-node.md#git-node-wpt

test/fixtures/wpt/url/resources/urltestdata.json

+185
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,36 @@
734734
"search": "",
735735
"hash": ""
736736
},
737+
{
738+
"input": "http://a:b@c\\",
739+
"base": null,
740+
"href": "http://a:b@c/",
741+
"origin": "http://c",
742+
"protocol": "http:",
743+
"username": "a",
744+
"password": "b",
745+
"host": "c",
746+
"hostname": "c",
747+
"port": "",
748+
"pathname": "/",
749+
"search": "",
750+
"hash": ""
751+
},
752+
{
753+
"input": "ws://a@b\\c",
754+
"base": null,
755+
"href": "ws://a@b/c",
756+
"origin": "ws://b",
757+
"protocol": "ws:",
758+
"username": "a",
759+
"password": "",
760+
"host": "b",
761+
"hostname": "b",
762+
"port": "",
763+
"pathname": "/c",
764+
"search": "",
765+
"hash": ""
766+
},
737767
{
738768
"input": "foo:/",
739769
"base": "http://example.org/foo/bar",
@@ -9627,5 +9657,160 @@
96279657
"pathname": "",
96289658
"search": "",
96299659
"hash": ""
9660+
},
9661+
"Scheme relative path starting with multiple slashes",
9662+
{
9663+
"input": "///test",
9664+
"base": "http://example.org/",
9665+
"href": "http://test/",
9666+
"protocol": "http:",
9667+
"username": "",
9668+
"password": "",
9669+
"host": "test",
9670+
"hostname": "test",
9671+
"port": "",
9672+
"pathname": "/",
9673+
"search": "",
9674+
"hash": ""
9675+
},
9676+
{
9677+
"input": "///\\//\\//test",
9678+
"base": "http://example.org/",
9679+
"href": "http://test/",
9680+
"protocol": "http:",
9681+
"username": "",
9682+
"password": "",
9683+
"host": "test",
9684+
"hostname": "test",
9685+
"port": "",
9686+
"pathname": "/",
9687+
"search": "",
9688+
"hash": ""
9689+
},
9690+
{
9691+
"input": "///example.org/path",
9692+
"base": "http://example.org/",
9693+
"href": "http://example.org/path",
9694+
"protocol": "http:",
9695+
"username": "",
9696+
"password": "",
9697+
"host": "example.org",
9698+
"hostname": "example.org",
9699+
"port": "",
9700+
"pathname": "/path",
9701+
"search": "",
9702+
"hash": ""
9703+
},
9704+
{
9705+
"input": "///example.org/../path",
9706+
"base": "http://example.org/",
9707+
"href": "http://example.org/path",
9708+
"protocol": "http:",
9709+
"username": "",
9710+
"password": "",
9711+
"host": "example.org",
9712+
"hostname": "example.org",
9713+
"port": "",
9714+
"pathname": "/path",
9715+
"search": "",
9716+
"hash": ""
9717+
},
9718+
{
9719+
"input": "///example.org/../../",
9720+
"base": "http://example.org/",
9721+
"href": "http://example.org/",
9722+
"protocol": "http:",
9723+
"username": "",
9724+
"password": "",
9725+
"host": "example.org",
9726+
"hostname": "example.org",
9727+
"port": "",
9728+
"pathname": "/",
9729+
"search": "",
9730+
"hash": ""
9731+
},
9732+
{
9733+
"input": "///example.org/../path/../../",
9734+
"base": "http://example.org/",
9735+
"href": "http://example.org/",
9736+
"protocol": "http:",
9737+
"username": "",
9738+
"password": "",
9739+
"host": "example.org",
9740+
"hostname": "example.org",
9741+
"port": "",
9742+
"pathname": "/",
9743+
"search": "",
9744+
"hash": ""
9745+
},
9746+
{
9747+
"input": "///example.org/../path/../../path",
9748+
"base": "http://example.org/",
9749+
"href": "http://example.org/path",
9750+
"protocol": "http:",
9751+
"username": "",
9752+
"password": "",
9753+
"host": "example.org",
9754+
"hostname": "example.org",
9755+
"port": "",
9756+
"pathname": "/path",
9757+
"search": "",
9758+
"hash": ""
9759+
},
9760+
{
9761+
"input": "/\\/\\//example.org/../path",
9762+
"base": "http://example.org/",
9763+
"href": "http://example.org/path",
9764+
"protocol": "http:",
9765+
"username": "",
9766+
"password": "",
9767+
"host": "example.org",
9768+
"hostname": "example.org",
9769+
"port": "",
9770+
"pathname": "/path",
9771+
"search": "",
9772+
"hash": ""
9773+
},
9774+
{
9775+
"input": "///abcdef/../",
9776+
"base": "file:///",
9777+
"href": "file:///",
9778+
"protocol": "file:",
9779+
"username": "",
9780+
"password": "",
9781+
"host": "",
9782+
"hostname": "",
9783+
"port": "",
9784+
"pathname": "/",
9785+
"search": "",
9786+
"hash": ""
9787+
},
9788+
{
9789+
"input": "/\\//\\/a/../",
9790+
"base": "file:///",
9791+
"href": "file://////",
9792+
"protocol": "file:",
9793+
"username": "",
9794+
"password": "",
9795+
"host": "",
9796+
"hostname": "",
9797+
"port": "",
9798+
"pathname": "////",
9799+
"search": "",
9800+
"hash": ""
9801+
},
9802+
{
9803+
"input": "//a/../",
9804+
"base": "file:///",
9805+
"href": "file://a/",
9806+
"protocol": "file:",
9807+
"username": "",
9808+
"password": "",
9809+
"host": "a",
9810+
"hostname": "a",
9811+
"port": "",
9812+
"pathname": "/",
9813+
"search": "",
9814+
"hash": ""
96309815
}
96319816
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// This intentionally does not use resources/urltestdata.json to preserve resources.
2+
[
3+
{
4+
"url": undefined,
5+
"base": undefined,
6+
"expected": false
7+
},
8+
{
9+
"url": "aaa:b",
10+
"base": undefined,
11+
"expected": true
12+
},
13+
{
14+
"url": undefined,
15+
"base": "aaa:b",
16+
"expected": false
17+
},
18+
{
19+
"url": "aaa:/b",
20+
"base": undefined,
21+
"expected": true
22+
},
23+
{
24+
"url": undefined,
25+
"base": "aaa:/b",
26+
"expected": true
27+
},
28+
{
29+
"url": "https://test:test",
30+
"base": undefined,
31+
"expected": false
32+
},
33+
{
34+
"url": "a",
35+
"base": "https://b/",
36+
"expected": true
37+
}
38+
].forEach(({ url, base, expected }) => {
39+
test(() => {
40+
if (expected == false) {
41+
assert_equals(URL.parse(url, base), null);
42+
} else {
43+
assert_equals(URL.parse(url, base).href, new URL(url, base).href);
44+
}
45+
}, `URL.parse(${url}, ${base})`);
46+
});
47+
48+
test(() => {
49+
assert_not_equals(URL.parse("https://example/"), URL.parse("https://example/"));
50+
}, `URL.parse() should return a unique object`);

0 commit comments

Comments
 (0)