From 7728eee7e87b6b90870c3e0051ceb8a076a5da52 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 31 Mar 2023 09:03:06 -0400 Subject: [PATCH] url: use ada::url_aggregator for parsing urls PR-URL: https://github.com/nodejs/node/pull/47339 Reviewed-By: Tiancheng "Timothy" Gu Reviewed-By: Rich Trott --- lib/internal/url.js | 258 +++++++++--- lib/url.js | 6 +- src/node_snapshotable.cc | 1 + src/node_snapshotable.h | 1 + src/node_url.cc | 379 +++++++++--------- src/node_url.h | 56 +++ .../test-whatwg-url-custom-inspect.js | 21 +- tsconfig.json | 1 + typings/internalBinding/url.d.ts | 12 + 9 files changed, 478 insertions(+), 257 deletions(-) create mode 100644 typings/internalBinding/url.d.ts diff --git a/lib/internal/url.js b/lib/internal/url.js index 3fe78d765d4a2c..51688b8403076a 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -87,13 +87,7 @@ const querystring = require('querystring'); const { platform } = process; const isWindows = platform === 'win32'; -const { - domainToASCII: _domainToASCII, - domainToUnicode: _domainToUnicode, - parse, - canParse: _canParse, - updateUrl, -} = internalBinding('url'); +const bindingUrl = internalBinding('url'); const { storeDataObject, @@ -142,16 +136,46 @@ function lazyCryptoRandom() { // the C++ binding. // Refs: https://url.spec.whatwg.org/#concept-url class URLContext { + // This is the maximum value uint32_t can get. + // Ada uses uint32_t(-1) for declaring omitted values. + static #omitted = 4294967295; + href = ''; - origin = ''; - protocol = ''; - hostname = ''; - pathname = ''; - search = ''; - username = ''; - password = ''; - port = ''; - hash = ''; + protocol_end = 0; + username_end = 0; + host_start = 0; + host_end = 0; + pathname_start = 0; + search_start = 0; + hash_start = 0; + port = 0; + /** + * Refers to `ada::scheme::type` + * + * enum type : uint8_t { + * HTTP = 0, + * NOT_SPECIAL = 1, + * HTTPS = 2, + * WS = 3, + * FTP = 4, + * WSS = 5, + * FILE = 6 + * }; + * @type {number} + */ + scheme_type = 1; + + get hasPort() { + return this.port !== URLContext.#omitted; + } + + get hasSearch() { + return this.search_start !== URLContext.#omitted; + } + + get hasHash() { + return this.hash_start !== URLContext.#omitted; + } } function isURLSearchParams(self) { @@ -581,13 +605,13 @@ class URL { base = `${base}`; } - const isValid = parse(input, - base, - this.#onParseComplete); + const href = bindingUrl.parse(input, base); - if (!isValid) { + if (!href) { throw new ERR_INVALID_URL(input); } + + this.#updateContext(href); } [inspect.custom](depth, opts) { @@ -622,23 +646,40 @@ class URL { return `${constructor.name} ${inspect(obj, opts)}`; } - #onParseComplete = (href, origin, protocol, hostname, pathname, - search, username, password, port, hash) => { + #updateContext(href) { const ctx = this[context]; ctx.href = href; - ctx.origin = origin; - ctx.protocol = protocol; - ctx.hostname = hostname; - ctx.pathname = pathname; - ctx.search = search; - ctx.username = username; - ctx.password = password; + + const { + 0: protocol_end, + 1: username_end, + 2: host_start, + 3: host_end, + 4: port, + 5: pathname_start, + 6: search_start, + 7: hash_start, + 8: scheme_type, + } = bindingUrl.urlComponents; + + ctx.protocol_end = protocol_end; + ctx.username_end = username_end; + ctx.host_start = host_start; + ctx.host_end = host_end; ctx.port = port; - ctx.hash = hash; + ctx.pathname_start = pathname_start; + ctx.search_start = search_start; + ctx.hash_start = hash_start; + ctx.scheme_type = scheme_type; + if (this[searchParams]) { - this[searchParams][searchParams] = parseParams(search); + if (ctx.hasSearch) { + this[searchParams][searchParams] = parseParams(this.search); + } else { + this[searchParams][searchParams] = []; + } } - }; + } toString() { if (!isURL(this)) @@ -655,122 +696,210 @@ class URL { set href(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - const valid = updateUrl(this[context].href, updateActions.kHref, `${value}`, this.#onParseComplete); - if (!valid) { throw ERR_INVALID_URL(`${value}`); } + value = `${value}`; + const href = bindingUrl.update(this[context].href, updateActions.kHref, value); + if (!href) { throw ERR_INVALID_URL(value); } + this.#updateContext(href); } // readonly get origin() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].origin; + const ctx = this[context]; + const protocol = StringPrototypeSlice(ctx.href, 0, ctx.protocol_end); + + // Check if scheme_type is not `NOT_SPECIAL` + if (ctx.scheme_type !== 1) { + // Check if scheme_type is `FILE` + if (ctx.scheme_type === 6) { + return 'null'; + } + return `${protocol}//${this.host}`; + } + + if (protocol === 'blob:') { + const path = this.pathname; + if (path.length > 0) { + try { + const out = new URL(path); + if (out[context].scheme_type !== 1) { + return `${out.protocol}//${out.host}`; + } + } catch { + // Do nothing. + } + } + } + + return 'null'; } get protocol() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].protocol; + return StringPrototypeSlice(this[context].href, 0, this[context].protocol_end); } set protocol(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kProtocol, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kProtocol, `${value}`); + if (href) { + this.#updateContext(href); + } } get username() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].username; + const ctx = this[context]; + if (ctx.protocol_end + 2 < ctx.username_end) { + return StringPrototypeSlice(ctx.href, ctx.protocol_end + 2, ctx.username_end); + } + return ''; } set username(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kUsername, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kUsername, `${value}`); + if (href) { + this.#updateContext(href); + } } get password() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].password; + const ctx = this[context]; + if (ctx.host_start - ctx.username_end > 0) { + return StringPrototypeSlice(ctx.href, ctx.username_end + 1, ctx.host_start); + } + return ''; } set password(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kPassword, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kPassword, `${value}`); + if (href) { + this.#updateContext(href); + } } get host() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - const port = this[context].port; - const suffix = port.length > 0 ? `:${port}` : ''; - return this[context].hostname + suffix; + const ctx = this[context]; + let startsAt = ctx.host_start; + if (ctx.href[startsAt] === '@') { + startsAt++; + } + // If we have an empty host, then the space between components.host_end and + // components.pathname_start may be occupied by /. + if (startsAt === ctx.host_end) { + return ''; + } + return StringPrototypeSlice(ctx.href, startsAt, ctx.pathname_start); } set host(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kHost, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kHost, `${value}`); + if (href) { + this.#updateContext(href); + } } get hostname() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].hostname; + const ctx = this[context]; + let startsAt = ctx.host_start; + // host_start might be "@" if the URL has credentials + if (ctx.href[startsAt] === '@') { + startsAt++; + } + return StringPrototypeSlice(ctx.href, startsAt, ctx.host_end); } set hostname(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kHostname, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kHostname, `${value}`); + if (href) { + this.#updateContext(href); + } } get port() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].port; + if (this[context].hasPort) { + return `${this[context].port}`; + } + return ''; } set port(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kPort, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kPort, `${value}`); + if (href) { + this.#updateContext(href); + } } get pathname() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].pathname; + const ctx = this[context]; + let endsAt; + if (ctx.hasSearch) { + endsAt = ctx.search_start; + } else if (ctx.hasHash) { + endsAt = ctx.hash_start; + } + return StringPrototypeSlice(ctx.href, ctx.pathname_start, endsAt); } set pathname(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kPathname, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kPathname, `${value}`); + if (href) { + this.#updateContext(href); + } } get search() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].search; + const ctx = this[context]; + if (!ctx.hasSearch) { return ''; } + let endsAt = ctx.href.length; + if (ctx.hasHash) { endsAt = ctx.hash_start; } + if (endsAt - ctx.search_start <= 1) { return ''; } + return StringPrototypeSlice(ctx.href, ctx.search_start, endsAt); } set search(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kSearch, toUSVString(value), this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kSearch, toUSVString(value)); + if (href) { + this.#updateContext(href); + } } // readonly get searchParams() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - // Create URLSearchParams on demand to greatly improve the URL performance. if (this[searchParams] == null) { - this[searchParams] = new URLSearchParams(this[context].search); + this[searchParams] = new URLSearchParams(this.search); this[searchParams][context] = this; } return this[searchParams]; @@ -779,13 +908,20 @@ class URL { get hash() { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].hash; + const ctx = this[context]; + if (!ctx.hasHash || (ctx.href.length - ctx.hash_start <= 1)) { + return ''; + } + return StringPrototypeSlice(ctx.href, ctx.hash_start); } set hash(value) { if (!isURL(this)) throw new ERR_INVALID_THIS('URL'); - updateUrl(this[context].href, updateActions.kHash, `${value}`, this.#onParseComplete); + const href = bindingUrl.update(this[context].href, updateActions.kHash, `${value}`); + if (href) { + this.#updateContext(href); + } } toJSON() { @@ -801,7 +937,7 @@ class URL { base = `${base}`; } - return _canParse(url, base); + return bindingUrl.canParse(url, base); } static createObjectURL(obj) { @@ -1164,7 +1300,7 @@ function domainToASCII(domain) { throw new ERR_MISSING_ARGS('domain'); // toUSVString is not needed. - return _domainToASCII(`${domain}`); + return bindingUrl.domainToASCII(`${domain}`); } function domainToUnicode(domain) { @@ -1172,7 +1308,7 @@ function domainToUnicode(domain) { throw new ERR_MISSING_ARGS('domain'); // toUSVString is not needed. - return _domainToUnicode(`${domain}`); + return bindingUrl.domainToUnicode(`${domain}`); } /** @@ -1355,4 +1491,6 @@ module.exports = { urlToHttpOptions, encodeStr, isURL, + + urlUpdateActions: updateActions, }; diff --git a/lib/url.js b/lib/url.js index 0841bb4d7f08a4..c7ac8ff5739b9a 100644 --- a/lib/url.js +++ b/lib/url.js @@ -59,9 +59,7 @@ const { urlToHttpOptions, } = require('internal/url'); -const { - formatUrl, -} = internalBinding('url'); +const bindingUrl = internalBinding('url'); const { getOptionValue } = require('internal/options'); @@ -627,7 +625,7 @@ function urlFormat(urlObject, options) { } } - return formatUrl(urlObject.href, fragment, unicode, search, auth); + return bindingUrl.format(urlObject.href, fragment, unicode, search, auth); } return Url.prototype.format.call(urlObject); diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 94b16744ee9a79..bfa048a4a8aa18 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -18,6 +18,7 @@ #include "node_metadata.h" #include "node_process.h" #include "node_snapshot_builder.h" +#include "node_url.h" #include "node_util.h" #include "node_v8.h" #include "node_v8_platform-inl.h" diff --git a/src/node_snapshotable.h b/src/node_snapshotable.h index 3f4d0780131e20..190910ced83eed 100644 --- a/src/node_snapshotable.h +++ b/src/node_snapshotable.h @@ -27,6 +27,7 @@ struct PropInfo { V(v8_binding_data, v8_utils::BindingData) \ V(blob_binding_data, BlobBindingData) \ V(process_binding_data, process::BindingData) \ + V(url_binding_data, url::BindingData) \ V(util_weak_reference, util::WeakReference) enum class EmbedderObjectType : uint8_t { diff --git a/src/node_url.cc b/src/node_url.cc index 35476433cd8361..551e726145f06b 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -5,14 +5,16 @@ #include "node_external_reference.h" #include "node_i18n.h" #include "util-inl.h" +#include "v8.h" +#include #include #include namespace node { +namespace url { using v8::Context; -using v8::Function; using v8::FunctionCallbackInfo; using v8::HandleScope; using v8::Isolate; @@ -22,102 +24,49 @@ using v8::Object; using v8::String; using v8::Value; -namespace url { -namespace { - -enum url_update_action { - kProtocol = 0, - kHost = 1, - kHostname = 2, - kPort = 3, - kUsername = 4, - kPassword = 5, - kPathname = 6, - kSearch = 7, - kHash = 8, - kHref = 9, -}; - -auto GetCallbackArgs(Environment* env, const ada::result& url) { - Local context = env->context(); - Isolate* isolate = env->isolate(); - - auto js_string = [&](std::string_view sv) { - return ToV8Value(context, sv, isolate).ToLocalChecked(); - }; - return std::array{ - js_string(url->get_href()), - js_string(url->get_origin()), - js_string(url->get_protocol()), - js_string(url->get_hostname()), - js_string(url->get_pathname()), - js_string(url->get_search()), - js_string(url->get_username()), - js_string(url->get_password()), - js_string(url->get_port()), - js_string(url->get_hash()), - }; +void BindingData::MemoryInfo(MemoryTracker* tracker) const { + tracker->TrackField("url_components_buffer", url_components_buffer_); } -void Parse(const FunctionCallbackInfo& args) { - CHECK_GE(args.Length(), 3); - CHECK(args[0]->IsString()); // input - // args[1] // base url - CHECK(args[2]->IsFunction()); // complete callback - - Local success_callback_ = args[2].As(); - - Environment* env = Environment::GetCurrent(args); - HandleScope handle_scope(env->isolate()); - Context::Scope context_scope(env->context()); - - Utf8Value input(env->isolate(), args[0]); - ada::result base; - ada::url* base_pointer = nullptr; - if (args[1]->IsString()) { - base = ada::parse(Utf8Value(env->isolate(), args[1]).ToString()); - if (!base) { - return args.GetReturnValue().Set(false); - } - base_pointer = &base.value(); - } - ada::result out = ada::parse(input.ToStringView(), base_pointer); - - if (!out) { - return args.GetReturnValue().Set(false); - } - - auto argv = GetCallbackArgs(env, out); - USE(success_callback_->Call( - env->context(), args.This(), argv.size(), argv.data())); - args.GetReturnValue().Set(true); +BindingData::BindingData(Realm* realm, v8::Local object) + : SnapshotableObject(realm, object, type_int), + url_components_buffer_(realm->isolate(), kURLComponentsLength) { + object + ->Set(realm->context(), + FIXED_ONE_BYTE_STRING(realm->isolate(), "urlComponents"), + url_components_buffer_.GetJSArray()) + .Check(); } -void CanParse(const FunctionCallbackInfo& args) { - CHECK_GE(args.Length(), 2); - CHECK(args[0]->IsString()); // input - // args[1] // base url - - Environment* env = Environment::GetCurrent(args); - HandleScope handle_scope(env->isolate()); - Context::Scope context_scope(env->context()); +bool BindingData::PrepareForSerialization(v8::Local context, + v8::SnapshotCreator* creator) { + // We'll just re-initialize the buffers in the constructor since their + // contents can be thrown away once consumed in the previous call. + url_components_buffer_.Release(); + // Return true because we need to maintain the reference to the binding from + // JS land. + return true; +} - Utf8Value input(env->isolate(), args[0]); - ada::result base; - ada::url* base_pointer = nullptr; - if (args[1]->IsString()) { - base = ada::parse(Utf8Value(env->isolate(), args[1]).ToString()); - if (!base) { - return args.GetReturnValue().Set(false); - } - base_pointer = &base.value(); - } - ada::result out = ada::parse(input.ToStringView(), base_pointer); +InternalFieldInfoBase* BindingData::Serialize(int index) { + DCHECK_EQ(index, BaseObject::kEmbedderType); + InternalFieldInfo* info = + InternalFieldInfoBase::New(type()); + return info; +} - args.GetReturnValue().Set(out.has_value()); +void BindingData::Deserialize(v8::Local context, + v8::Local holder, + int index, + InternalFieldInfoBase* info) { + DCHECK_EQ(index, BaseObject::kEmbedderType); + v8::HandleScope scope(context->GetIsolate()); + Realm* realm = Realm::GetCurrent(context); + BindingData* binding = realm->AddBindingData(context, holder); + CHECK_NOT_NULL(binding); } -void DomainToASCII(const FunctionCallbackInfo& args) { +void BindingData::DomainToASCII(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); @@ -127,11 +76,10 @@ void DomainToASCII(const FunctionCallbackInfo& args) { return args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); } -#if defined(NODE_HAVE_I18N_SUPPORT) // It is important to have an initial value that contains a special scheme. // Since it will change the implementation of `set_hostname` according to URL // spec. - ada::result out = ada::parse("ws://x"); + auto out = ada::parse("ws://x"); DCHECK(out); if (!out->set_hostname(input)) { return args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); @@ -139,53 +87,144 @@ void DomainToASCII(const FunctionCallbackInfo& args) { std::string host = out->get_hostname(); args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), host.c_str()).ToLocalChecked()); -#else - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), input.c_str()).ToLocalChecked()); -#endif } -void DomainToUnicode(const FunctionCallbackInfo& args) { +void BindingData::DomainToUnicode(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); std::string input = Utf8Value(env->isolate(), args[0]).ToString(); -#if defined(NODE_HAVE_I18N_SUPPORT) // It is important to have an initial value that contains a special scheme. // Since it will change the implementation of `set_hostname` according to URL // spec. - ada::result out = ada::parse("ws://x"); + auto out = ada::parse("ws://x"); DCHECK(out); if (!out->set_hostname(input)) { return args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), "").ToLocalChecked()); } - std::string host = out->get_hostname(); + std::string result = ada::unicode::to_unicode(out->get_hostname()); - MaybeStackBuffer buf; - int32_t len = i18n::ToUnicode(&buf, host.data(), host.length()); + args.GetReturnValue().Set(String::NewFromUtf8(env->isolate(), + result.c_str(), + NewStringType::kNormal, + result.length()) + .ToLocalChecked()); +} - if (len < 0) { - return args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), "").ToLocalChecked()); +// TODO(@anonrig): Add V8 Fast API for CanParse method +void BindingData::CanParse(const FunctionCallbackInfo& args) { + CHECK_GE(args.Length(), 2); + CHECK(args[0]->IsString()); // input + // args[1] // base url + + Environment* env = Environment::GetCurrent(args); + HandleScope handle_scope(env->isolate()); + Context::Scope context_scope(env->context()); + + Utf8Value input(env->isolate(), args[0]); + ada::result base; + ada::url_aggregator* base_pointer = nullptr; + if (args[1]->IsString()) { + base = ada::parse( + Utf8Value(env->isolate(), args[1]).ToString()); + if (!base) { + return args.GetReturnValue().Set(false); + } + base_pointer = &base.value(); + } + auto out = + ada::parse(input.ToStringView(), base_pointer); + + args.GetReturnValue().Set(out.has_value()); +} + +void BindingData::Format(const FunctionCallbackInfo& args) { + CHECK_GT(args.Length(), 4); + CHECK(args[0]->IsString()); // url href + + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + + Utf8Value href(isolate, args[0].As()); + const bool fragment = args[1]->IsTrue(); + const bool unicode = args[2]->IsTrue(); + const bool search = args[3]->IsTrue(); + const bool auth = args[4]->IsTrue(); + + // ada::url provides a faster alternative to ada::url_aggregator if we + // directly want to manipulate the url components without using the respective + // setters. therefore we are using ada::url here. + auto out = ada::parse(href.ToStringView()); + CHECK(out); + + if (!fragment) { + out->fragment = std::nullopt; } + if (unicode) { + out->host = ada::idna::to_unicode(out->get_hostname()); + } + + if (!search) { + out->query = std::nullopt; + } + + if (!auth) { + out->username = ""; + out->password = ""; + } + + std::string result = out->get_href(); + args.GetReturnValue().Set(String::NewFromUtf8(env->isolate(), + result.data(), + NewStringType::kNormal, + result.length()) + .ToLocalChecked()); +} + +void BindingData::Parse(const FunctionCallbackInfo& args) { + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); // input + // args[1] // base url + + BindingData* binding_data = Realm::GetBindingData(args); + Environment* env = Environment::GetCurrent(args); + HandleScope handle_scope(env->isolate()); + Context::Scope context_scope(env->context()); + + Utf8Value input(env->isolate(), args[0]); + ada::result base; + ada::url_aggregator* base_pointer = nullptr; + if (args[1]->IsString()) { + base = ada::parse( + Utf8Value(env->isolate(), args[1]).ToString()); + if (!base) { + return args.GetReturnValue().Set(false); + } + base_pointer = &base.value(); + } + auto out = + ada::parse(input.ToStringView(), base_pointer); + + if (!out) { + return args.GetReturnValue().Set(false); + } + + binding_data->UpdateComponents(out->get_components(), out->type); + args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), *buf, NewStringType::kNormal, len) + ToV8Value(env->context(), out->get_href(), env->isolate()) .ToLocalChecked()); -#else // !defined(NODE_HAVE_I18N_SUPPORT) - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), input.c_str()).ToLocalChecked()); -#endif } -void UpdateUrl(const FunctionCallbackInfo& args) { +void BindingData::Update(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); // href CHECK(args[1]->IsNumber()); // action type CHECK(args[2]->IsString()); // new value - CHECK(args[3]->IsFunction()); // success callback + BindingData* binding_data = Realm::GetBindingData(args); Environment* env = Environment::GetCurrent(args); Isolate* isolate = env->isolate(); @@ -193,10 +232,9 @@ void UpdateUrl(const FunctionCallbackInfo& args) { args[1]->Uint32Value(env->context()).FromJust()); Utf8Value input(isolate, args[0].As()); Utf8Value new_value(isolate, args[2].As()); - Local success_callback_ = args[3].As(); std::string_view new_value_view = new_value.ToStringView(); - ada::result out = ada::parse(input.ToStringView()); + auto out = ada::parse(input.ToStringView()); CHECK(out); bool result{true}; @@ -242,89 +280,60 @@ void UpdateUrl(const FunctionCallbackInfo& args) { result = out->set_username(new_value_view); break; } + default: + UNREACHABLE("Unsupported URL update action"); } - auto argv = GetCallbackArgs(env, out); - USE(success_callback_->Call( - env->context(), args.This(), argv.size(), argv.data())); - args.GetReturnValue().Set(result); -} - -void FormatUrl(const FunctionCallbackInfo& args) { - CHECK_GT(args.Length(), 4); - CHECK(args[0]->IsString()); // url href - - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - - Utf8Value href(isolate, args[0].As()); - const bool fragment = args[1]->IsTrue(); - const bool unicode = args[2]->IsTrue(); - const bool search = args[3]->IsTrue(); - const bool auth = args[4]->IsTrue(); - - ada::result out = ada::parse(href.ToStringView()); - CHECK(out); - - if (!fragment) { - out->fragment = std::nullopt; - } - - if (unicode) { -#if defined(NODE_HAVE_I18N_SUPPORT) - std::string hostname = out->get_hostname(); - MaybeStackBuffer buf; - int32_t len = i18n::ToUnicode(&buf, hostname.data(), hostname.length()); - - if (len < 0) { - out->host = ""; - } else { - out->host = buf.ToString(); - } -#else - out->host = ""; -#endif - } - - if (!search) { - out->query = std::nullopt; + if (!result) { + return args.GetReturnValue().Set(false); } - if (!auth) { - out->username = ""; - out->password = ""; - } + binding_data->UpdateComponents(out->get_components(), out->type); + args.GetReturnValue().Set( + ToV8Value(env->context(), out->get_href(), env->isolate()) + .ToLocalChecked()); +} - std::string result = out->get_href(); - args.GetReturnValue().Set(String::NewFromUtf8(env->isolate(), - result.data(), - NewStringType::kNormal, - result.length()) - .ToLocalChecked()); +void BindingData::UpdateComponents(const ada::url_components& components, + const ada::scheme::type type) { + url_components_buffer_[0] = components.protocol_end; + url_components_buffer_[1] = components.username_end; + url_components_buffer_[2] = components.host_start; + url_components_buffer_[3] = components.host_end; + url_components_buffer_[4] = components.port; + url_components_buffer_[5] = components.pathname_start; + url_components_buffer_[6] = components.search_start; + url_components_buffer_[7] = components.hash_start; + url_components_buffer_[8] = type; + static_assert(kURLComponentsLength == 9, + "kURLComponentsLength should be up-to-date"); } -void Initialize(Local target, - Local unused, - Local context, - void* priv) { - SetMethod(context, target, "parse", Parse); - SetMethod(context, target, "updateUrl", UpdateUrl); - SetMethodNoSideEffect(context, target, "canParse", CanParse); - SetMethodNoSideEffect(context, target, "formatUrl", FormatUrl); +void BindingData::Initialize(Local target, + Local unused, + Local context, + void* priv) { + Realm* realm = Realm::GetCurrent(context); + BindingData* const binding_data = + realm->AddBindingData(context, target); + if (binding_data == nullptr) return; SetMethodNoSideEffect(context, target, "domainToASCII", DomainToASCII); SetMethodNoSideEffect(context, target, "domainToUnicode", DomainToUnicode); + SetMethodNoSideEffect(context, target, "canParse", CanParse); + SetMethodNoSideEffect(context, target, "format", Format); + SetMethod(context, target, "parse", Parse); + SetMethod(context, target, "update", Update); } -} // namespace - -void RegisterExternalReferences(ExternalReferenceRegistry* registry) { - registry->Register(Parse); - registry->Register(CanParse); - registry->Register(UpdateUrl); - registry->Register(FormatUrl); +void BindingData::RegisterExternalReferences( + ExternalReferenceRegistry* registry) { registry->Register(DomainToASCII); registry->Register(DomainToUnicode); + registry->Register(CanParse); + registry->Register(Format); + registry->Register(Parse); + registry->Register(Update); } std::string FromFilePath(const std::string_view file_path) { @@ -338,7 +347,9 @@ std::string FromFilePath(const std::string_view file_path) { } } // namespace url + } // namespace node -NODE_BINDING_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize) -NODE_BINDING_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences) +NODE_BINDING_CONTEXT_AWARE_INTERNAL(url, node::url::BindingData::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE( + url, node::url::BindingData::RegisterExternalReferences) diff --git a/src/node_url.h b/src/node_url.h index c3d895d2f6092f..4630c426c29d67 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -3,18 +3,74 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS +#include #include "ada.h" +#include "aliased_buffer.h" #include "node.h" +#include "node_snapshotable.h" #include "util.h" #include namespace node { +class ExternalReferenceRegistry; + namespace url { +enum url_update_action { + kProtocol = 0, + kHost = 1, + kHostname = 2, + kPort = 3, + kUsername = 4, + kPassword = 5, + kPathname = 6, + kSearch = 7, + kHash = 8, + kHref = 9, +}; + +class BindingData : public SnapshotableObject { + public: + explicit BindingData(Realm* realm, v8::Local obj); + + using InternalFieldInfo = InternalFieldInfoBase; + + SERIALIZABLE_OBJECT_METHODS() + static constexpr FastStringKey type_name{"node::url::BindingData"}; + static constexpr EmbedderObjectType type_int = + EmbedderObjectType::k_url_binding_data; + + void MemoryInfo(MemoryTracker* tracker) const override; + SET_SELF_SIZE(BindingData) + SET_MEMORY_INFO_NAME(BindingData) + + static void DomainToASCII(const v8::FunctionCallbackInfo& args); + static void DomainToUnicode(const v8::FunctionCallbackInfo& args); + + static void CanParse(const v8::FunctionCallbackInfo& args); + static void Format(const v8::FunctionCallbackInfo& args); + static void Parse(const v8::FunctionCallbackInfo& args); + static void Update(const v8::FunctionCallbackInfo& args); + + static void Initialize(v8::Local target, + v8::Local unused, + v8::Local context, + void* priv); + static void RegisterExternalReferences(ExternalReferenceRegistry* registry); + + private: + static constexpr size_t kURLComponentsLength = 9; + AliasedUint32Array url_components_buffer_; + + void UpdateComponents(const ada::url_components& components, + const ada::scheme::type type); +}; + std::string FromFilePath(const std::string_view file_path); } // namespace url + } // namespace node #endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS diff --git a/test/parallel/test-whatwg-url-custom-inspect.js b/test/parallel/test-whatwg-url-custom-inspect.js index a7d30a6ab936c3..f7a690a4909e81 100644 --- a/test/parallel/test-whatwg-url-custom-inspect.js +++ b/test/parallel/test-whatwg-url-custom-inspect.js @@ -47,15 +47,18 @@ assert.strictEqual( hash: '#hash', [Symbol(context)]: URLContext { href: 'https://username:password@host.name:8080/path/name/?que=ry#hash', - origin: 'https://host.name:8080', - protocol: 'https:', - hostname: 'host.name', - pathname: '/path/name/', - search: '?que=ry', - username: 'username', - password: 'password', - port: '8080', - hash: '#hash' + protocol_end: 6, + username_end: 16, + host_start: 25, + host_end: 35, + pathname_start: 40, + search_start: 51, + hash_start: 58, + port: 8080, + scheme_type: 2, + [hasPort]: [Getter], + [hasSearch]: [Getter], + [hasHash]: [Getter] } }`); diff --git a/tsconfig.json b/tsconfig.json index 791bc8ac6387a1..f42ed9ad0eac67 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -15,6 +15,7 @@ "./typings/internalBinding/symbols.d.ts", "./typings/internalBinding/timers.d.ts", "./typings/internalBinding/types.d.ts", + "./typings/internalBinding/url.d.ts", "./typings/internalBinding/util.d.ts", "./typings/internalBinding/worker.d.ts", "./typings/globals.d.ts", diff --git a/typings/internalBinding/url.d.ts b/typings/internalBinding/url.d.ts new file mode 100644 index 00000000000000..54d1cb1f93d790 --- /dev/null +++ b/typings/internalBinding/url.d.ts @@ -0,0 +1,12 @@ +import type { urlUpdateActions } from 'internal/url' + +declare function InternalBinding(binding: 'url'): { + urlComponents: Uint32Array; + + domainToASCII(input: string): string; + domainToUnicode(input: string): string; + canParse(input: string, base?: string): boolean; + format(input: string, fragment?: boolean, unicode?: boolean, search?: boolean, auth?: boolean): string; + parse(input: string, base?: string): string | false; + update(input: string, actionType: typeof urlUpdateActions, value: string): string | false; +};