diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel index d6132d4c34e..c2afde71046 100644 --- a/src/workerd/api/BUILD.bazel +++ b/src/workerd/api/BUILD.bazel @@ -534,3 +534,9 @@ wd_test( "//conditions:default": ["@platforms//:incompatible"], }), ) + +wd_test( + src = "tests/new-module-registry-test.wd-test", + args = ["--experimental"], + data = ["tests/new-module-registry-test.js"], +) diff --git a/src/workerd/api/tests/new-module-registry-test.js b/src/workerd/api/tests/new-module-registry-test.js new file mode 100644 index 00000000000..d271a41b6e4 --- /dev/null +++ b/src/workerd/api/tests/new-module-registry-test.js @@ -0,0 +1,176 @@ +import { + notStrictEqual, + ok, + rejects, + strictEqual, + deepStrictEqual, +} from 'assert'; +import { foo, default as def } from 'foo'; +import { default as fs } from 'node:fs'; +import { Buffer } from 'buffer'; +const { foo: foo2, default: def2 } = await import('bar'); + +// Verify that import.meta.url is correct here. +strictEqual(import.meta.url, 'file:///worker'); + +// Verify that import.meta.main is true here. +ok(import.meta.main); + +// Verify that import.meta.resolve provides correct results here. +// The input should be interpreted as a URL and normalized according +// to the rules in the WHATWG URL specification. +strictEqual(import.meta.resolve('./.././test/.././../foo'), 'file:///foo'); + +// There are four tests at this top level... one for the import of the node:assert +// module without the node: prefix specifier, two for the imports of the foo and +// bar modules from the worker, and one for the aliases node:fs module from the +// module worker. + +strictEqual(foo, 1); +strictEqual(def, 2); +strictEqual(foo2, 1); +strictEqual(def2, 2); +strictEqual(fs, 'abc'); + +// Equivalent to the above, but using the file: URL scheme. +import { foo as foo3, default as def3 } from 'file:///foo'; +strictEqual(foo, foo3); +strictEqual(def, def3); + +import * as text from 'text'; +strictEqual(text.default, 'abc'); + +import * as data from 'data'; +strictEqual(Buffer.from(data.default).toString(), 'abcdef'); + +import * as json from 'json'; +deepStrictEqual(json.default, { foo: 1 }); + +await rejects(import('invalid-json'), { + message: /Unexpected non-whitespace character after JSON/, +}); + +await rejects(import('module-not-found'), { + message: /Module not found: file:\/\/\/module-not-found/, +}); + +// Verify that a module is unable to perform IO operations at the top level, even if +// the dynamic import is initiated within the scope of an active IoContext. +export const noTopLevelIo = { + async test() { + await rejects(import('bad'), { + message: /^Disallowed operation called within global scope/, + }); + }, +}; + +// Verify that async local storage is propagated into dynamic imports. +export const alsPropagationDynamicImport = { + async test() { + const { AsyncLocalStorage } = await import('async_hooks'); + globalThis.als = new AsyncLocalStorage(); + const res = await globalThis.als.run(123, () => import('als')); + strictEqual(res.default, 123); + }, +}; + +// Query strings and fragments create new instances of known modules. +export const queryAndFragment = { + async test() { + // Each resolves the same underlying module but creates a new instance. + // The exports should be the same but the module namespaces should be different. + + const a = await import('foo?query'); + const b = await import('foo#fragment'); + const c = await import('foo?query#fragment'); + const d = await import('foo'); + + strictEqual(a.default, 2); + strictEqual(a.foo, 1); + strictEqual(a.default, b.default); + strictEqual(a.default, c.default); + strictEqual(a.default, d.default); + strictEqual(a.foo, b.foo); + strictEqual(a.foo, c.foo); + strictEqual(a.foo, d.foo); + + notStrictEqual(a, b); + notStrictEqual(a, c); + notStrictEqual(a, d); + notStrictEqual(b, c); + notStrictEqual(b, d); + notStrictEqual(c, d); + + // The import.meta.url for each should match the specifier used to import the instance. + strictEqual(a.bar, 'file:///foo?query'); + strictEqual(b.bar, 'file:///foo#fragment'); + strictEqual(c.bar, 'file:///foo?query#fragment'); + strictEqual(d.bar, 'file:///foo'); + }, +}; + +// We do not currently support import assertions/attributes. Per the recommendation +// in the spec, we throw an error when they are encountered. +export const importAssertionsFail = { + async test() { + await rejects(import('ia'), { + message: /^Import attributes are not supported/, + }); + await rejects(import('foo', { with: { a: 'abc' } }), { + message: /^Import attributes are not supported/, + }); + }, +}; + +export const invalidUrlAsSpecifier = { + async test() { + await rejects(import('zebra: not a \x00 valid URL'), { + message: /Module not found/, + }); + }, +}; + +export const evalErrorsInEsmTopLevel = { + async test() { + await rejects(import('esm-error'), { + message: /boom/, + }); + await rejects(import('esm-error-dynamic'), { + message: /boom/, + }); + }, +}; + +// TODO(now): Tests +// * [ ] Include tests for all known module types +// * [x] ESM +// * [ ] CommonJS +// * [x] Text +// * [x] Data +// * [x] JSON +// * [ ] WASM +// * [ ] Python +// * [x] IO is forbidden in top-level module scope +// * [x] Async local storage context is propagated into dynamic imports +// * [x] Static import correctly handles node: modules with/without the node: prefix +// * [x] Dynamic import correctly handles node: modules with/without the node: prefix +// * [x] Worker bundle can alias node: modules +// * [x] modules not found are correctly reported as errors +// * [x] Errors during ESM evaluation are correctly reported +// * [x] Errors during dynamic import are correctly reported +// * [x] Errors in JSON module parsing are correctly reported +// * [x] Module specifiers are correctly handled as URLs +// * [x] Querys and fragments resolve new instances of known modules +// * [x] URL resolution works correctly +// * [x] Invalid URLs are correctly reported as errors +// * [x] Import assertions should be rejected +// * [ ] require(...) Works in CommonJs Modules +// * [ ] require(...) correctly handles node: modules with/without the node: prefix +// * [ ] Circular dependencies are correctly handled +// * [ ] Errors during CommonJs evaluation are correctly reported +// * [ ] Entry point ESM with no default export is correctly reported as error +// * [ ] CommonJs modules correctly expose named exports +// * [ ] require('module').createRequire API works as expected +// * [ ] Fallback service works as expected +// * [ ] console.log output correctly uses node-internal:inspect for output +// ... diff --git a/src/workerd/api/tests/new-module-registry-test.wd-test b/src/workerd/api/tests/new-module-registry-test.wd-test new file mode 100644 index 00000000000..2a726a55eef --- /dev/null +++ b/src/workerd/api/tests/new-module-registry-test.wd-test @@ -0,0 +1,43 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "new-module-registry-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "new-module-registry-test.js"), + (name = "foo", esModule = "export const foo = 1; export default 2; export const bar = import.meta.url"), + (name = "bar", esModule = "export const foo = 1; export default 2;"), + (name = "node:fs", esModule = "export default 'abc'"), + + # Intentionally bad module to test error handling. + # Evaluation will error because i/o is not permitted at top-level scope. + (name = "bad", esModule = "export default 1; setTimeout(() => {}, 10)"), + + # Ensure that async context is propagated into a dynamic import. + (name = "als", esModule = "export default globalThis.als.getStore()"), + + # Import assertions are not supported currently + (name = "ia", esModule = "import * as def from 'foo' with { a: 'test' }"), + + # Errors on ESM eval should be reported properly in both static and + # dynamic imports. + (name = "esm-error", esModule = "export default 1; throw new Error('boom');"), + (name = "esm-error-dynamic", esModule = "export * as d from 'esm-error'"), + + # Other module types work + (name = "text", text = "abc"), + (name = "data", data = "abcdef"), + (name = "json", json = "{ \"foo\": 1 }"), + (name = "invalid-json", json = "1n"), + ], + compatibilityDate = "2024-07-01", + compatibilityFlags = [ + "nodejs_compat_v2", + "new_module_registry", + "experimental", + ], + ) + ), + ], +); diff --git a/src/workerd/jsg/modules-new.c++ b/src/workerd/jsg/modules-new.c++ index 2831a8ebaa4..3aa519c5097 100644 --- a/src/workerd/jsg/modules-new.c++ +++ b/src/workerd/jsg/modules-new.c++ @@ -649,11 +649,9 @@ v8::MaybeLocal<v8::Promise> dynamicImport(v8::Local<v8::Context> context, // // For now, we do not support any import attributes, so if there are any at all // we will reject the import. - if (!import_assertions.IsEmpty()) { - if (import_assertions->Length() > 0) { - return rejected(js, js.typeError("Import attributes are not supported")); - }; - } + if (!import_assertions.IsEmpty() && import_assertions->Length() > 0) { + return rejected(js, js.typeError("Import attributes are not supported")); + }; Url referrer = ([&] { if (resource_name.IsEmpty()) { @@ -663,6 +661,14 @@ v8::MaybeLocal<v8::Promise> dynamicImport(v8::Local<v8::Context> context, return KJ_ASSERT_NONNULL(Url::tryParse(str.asPtr())); })(); + // If Node.js Compat v2 mode is enable, we have to check to see if the specifier + // is a bare node specifier and resolve it to a full node: URL. + if (isNodeJsCompatEnabled(js)) { + KJ_IF_SOME(nodeSpec, checkNodeSpecifier(spec)) { + spec = kj::mv(nodeSpec); + } + } + KJ_IF_SOME(url, referrer.tryResolve(spec.asPtr())) { auto normalized = url.clone(Url::EquivalenceOption::NORMALIZE_PATH); auto& registry = IsolateModuleRegistry::from(isolate); @@ -691,7 +697,6 @@ IsolateModuleRegistry::IsolateModuleRegistry( auto isolate = js.v8Isolate; auto context = isolate->GetCurrentContext(); KJ_ASSERT(!context.IsEmpty()); - KJ_ASSERT(context->GetAlignedPointerFromEmbedderData(2) == nullptr); context->SetAlignedPointerInEmbedderData(2, this); isolate->SetHostImportModuleDynamicallyCallback(&dynamicImport); isolate->SetHostInitializeImportMetaObjectCallback(&importMeta); @@ -706,76 +711,78 @@ v8::MaybeLocal<v8::Module> resolveCallback(v8::Local<v8::Context> context, auto& registry = IsolateModuleRegistry::from(isolate); auto& js = Lock::from(isolate); - try { - return js.tryCatch([&]() -> v8::MaybeLocal<v8::Module> { - auto spec = kj::str(specifier); - - // The proposed specification for import assertions strongly recommends that - // embedders reject import attributes and types they do not understand/implement. - // This is because import attributes can alter the interpretation of a module. - // Throwing an error for things we do not understand is the safest thing to do - // for backwards compatibility. - // - // For now, we do not support any import attributes, so if there are any at all - // we will reject the import. - if (!import_assertions.IsEmpty()) { - if (import_assertions->Length() > 0) { - js.throwException(js.typeError("Import attributes are not supported")); - }; - } + return js.tryCatch([&]() -> v8::MaybeLocal<v8::Module> { + auto spec = kj::str(specifier); + + // The proposed specification for import assertions strongly recommends that + // embedders reject import attributes and types they do not understand/implement. + // This is because import attributes can alter the interpretation of a module. + // Throwing an error for things we do not understand is the safest thing to do + // for backwards compatibility. + // + // For now, we do not support any import attributes, so if there are any at all + // we will reject the import. + if (!import_assertions.IsEmpty() && import_assertions->Length() > 0) { + js.throwException(js.typeError("Import attributes are not supported")); + } - ResolveContext::Type type = ResolveContext::Type::BUNDLE; + ResolveContext::Type type = ResolveContext::Type::BUNDLE; - auto& referrerUrl = registry.lookup(js, referrer) - .map([&](IsolateModuleRegistry::Entry& entry) -> const Url& { - switch (entry.module.type()) { - case Module::Type::BUNDLE: { - type = ResolveContext::Type::BUNDLE; - break; - } - case Module::Type::BUILTIN: { - type = ResolveContext::Type::BUILTIN; - break; - } - case Module::Type::BUILTIN_ONLY: { - type = ResolveContext::Type::BUILTIN_ONLY; - break; - } - case Module::Type::FALLBACK: { - type = ResolveContext::Type::BUNDLE; - break; - } + auto& referrerUrl = registry.lookup(js, referrer) + .map([&](IsolateModuleRegistry::Entry& entry) -> const Url& { + switch (entry.module.type()) { + case Module::Type::BUNDLE: { + type = ResolveContext::Type::BUNDLE; + break; + } + case Module::Type::BUILTIN: { + type = ResolveContext::Type::BUILTIN; + break; + } + case Module::Type::BUILTIN_ONLY: { + type = ResolveContext::Type::BUILTIN_ONLY; + break; } - return entry.specifier.specifier; - }).orDefault(ModuleBundle::BundleBuilder::BASE); + case Module::Type::FALLBACK: { + type = ResolveContext::Type::BUNDLE; + break; + } + } + return entry.specifier.specifier; + }).orDefault(ModuleBundle::BundleBuilder::BASE); - KJ_IF_SOME(url, referrerUrl.tryResolve(spec)) { - // Make sure that percent-encoding in the path is normalized so we can match correctly. - auto normalized = url.clone(Url::EquivalenceOption::NORMALIZE_PATH); - ResolveContext resolveContext = { - .type = type, - .source = ResolveContext::Source::STATIC_IMPORT, - .specifier = normalized, - .referrer = referrerUrl, - .rawSpecifier = spec.asPtr(), - }; - // TODO(soon): Add import assertions to the context. - - return registry.resolve(js, resolveContext); + // If Node.js Compat v2 mode is enable, we have to check to see if the specifier + // is a bare node specifier and resolve it to a full node: URL. + if (isNodeJsCompatEnabled(js)) { + KJ_IF_SOME(nodeSpec, checkNodeSpecifier(spec)) { + spec = kj::mv(nodeSpec); } + } - js.throwException(js.error(kj::str("Invalid module specifier: "_kj, specifier))); - }, [&](Value exception) -> v8::MaybeLocal<v8::Module> { - // If there are any synchronously thrown exceptions, we want to catch them - // here and convert them into a rejected promise. The only exception are - // fatal cases where the isolate is terminating which won't make it here - // anyway. - js.v8Isolate->ThrowException(exception.getHandle(js)); - return v8::MaybeLocal<v8::Module>(); - }); - } catch (...) { - kj::throwFatalException(kj::getCaughtExceptionAsKj()); - } + KJ_IF_SOME(url, referrerUrl.tryResolve(spec)) { + // Make sure that percent-encoding in the path is normalized so we can match correctly. + auto normalized = url.clone(Url::EquivalenceOption::NORMALIZE_PATH); + ResolveContext resolveContext = { + .type = type, + .source = ResolveContext::Source::STATIC_IMPORT, + .specifier = normalized, + .referrer = referrerUrl, + .rawSpecifier = spec.asPtr(), + }; + // TODO(soon): Add import assertions to the context. + + return registry.resolve(js, resolveContext); + } + + js.throwException(js.error(kj::str("Invalid module specifier: "_kj, specifier))); + }, [&](Value exception) -> v8::MaybeLocal<v8::Module> { + // If there are any synchronously thrown exceptions, we want to catch them + // here and convert them into a rejected promise. The only exception are + // fatal cases where the isolate is terminating which won't make it here + // anyway. + js.v8Isolate->ThrowException(exception.getHandle(js)); + return v8::MaybeLocal<v8::Module>(); + }); } // The fallback module bundle calls a single resolve callback to resolve all modules diff --git a/src/workerd/jsg/modules.c++ b/src/workerd/jsg/modules.c++ index 92755f5b69a..9d5238cac20 100644 --- a/src/workerd/jsg/modules.c++ +++ b/src/workerd/jsg/modules.c++ @@ -11,20 +11,6 @@ namespace workerd::jsg { namespace { -// This list must be kept in sync with the list of builtins from Node.js. -// It should be unlikely that anything is ever removed from this list, and -// adding items to it is considered a semver-major change in Node.js. -static const std::set<kj::StringPtr> NODEJS_BUILTINS{"_http_agent", "_http_client", "_http_common", - "_http_incoming", "_http_outgoing", "_http_server", "_stream_duplex", "_stream_passthrough", - "_stream_readable", "_stream_transform", "_stream_wrap", "_stream_writable", "_tls_common", - "_tls_wrap", "assert", "assert/strict", "async_hooks", "buffer", "child_process", "cluster", - "console", "constants", "crypto", "dgram", "diagnostics_channel", "dns", "dns/promises", "domain", - "events", "fs", "fs/promises", "http", "http2", "https", "inspector", "inspector/promises", - "module", "net", "os", "path", "path/posix", "path/win32", "perf_hooks", "process", "punycode", - "querystring", "readline", "readline/promises", "repl", "stream", "stream/consumers", - "stream/promises", "stream/web", "string_decoder", "sys", "timers", "timers/promises", "tls", - "trace_events", "tty", "url", "util", "util/types", "v8", "vm", "wasi", "worker_threads", "zlib"}; - // The CompileCache is used to hold cached compilation data for built-in JavaScript modules. // // Importantly, this is a process-lifetime in-memory cache that is only appropriate for @@ -278,19 +264,6 @@ v8::MaybeLocal<v8::Value> evaluateSyntheticModuleCallback( } // namespace -kj::Maybe<kj::String> checkNodeSpecifier(kj::StringPtr specifier) { - if (NODEJS_BUILTINS.contains(specifier)) { - return kj::str("node:", specifier); - } else if (specifier.startsWith("node:")) { - return kj::str(specifier); - } - return kj::none; -} - -bool isNodeJsCompatEnabled(jsg::Lock& js) { - return IsolateBase::from(js.v8Isolate).isNodeJsCompatEnabled(); -} - ModuleRegistry* getModulesForResolveCallback(v8::Isolate* isolate) { return static_cast<ModuleRegistry*>( isolate->GetCurrentContext()->GetAlignedPointerFromEmbedderData(2)); diff --git a/src/workerd/jsg/modules.h b/src/workerd/jsg/modules.h index f9373841872..7fe7a3318e2 100644 --- a/src/workerd/jsg/modules.h +++ b/src/workerd/jsg/modules.h @@ -14,9 +14,6 @@ namespace workerd::jsg { -kj::Maybe<kj::String> checkNodeSpecifier(kj::StringPtr specifier); -bool isNodeJsCompatEnabled(jsg::Lock& js); - class CommonJsModuleContext; class CommonJsModuleObject: public jsg::Object { diff --git a/src/workerd/jsg/util.c++ b/src/workerd/jsg/util.c++ index 7210e42c72f..96e1be6a444 100644 --- a/src/workerd/jsg/util.c++ +++ b/src/workerd/jsg/util.c++ @@ -7,6 +7,7 @@ #include "ser.h" #include <kj/debug.h> #include <cstdlib> +#include <set> #if !_WIN32 #include <cxxabi.h> @@ -677,4 +678,39 @@ v8::Local<v8::String> newExternalTwoByteString(Lock& js, kj::ArrayPtr<const uint return check(ExternTwoByteString::createExtern(js.v8Isolate, buf)); } +// ====================================================================================== +// Node.js Compat + +namespace { +// This list must be kept in sync with the list of builtins from Node.js. +// It should be unlikely that anything is ever removed from this list, and +// adding items to it is considered a semver-major change in Node.js. +static const std::set<kj::StringPtr> NODEJS_BUILTINS{"_http_agent"_kj, "_http_client"_kj, + "_http_common"_kj, "_http_incoming"_kj, "_http_outgoing"_kj, "_http_server"_kj, + "_stream_duplex"_kj, "_stream_passthrough"_kj, "_stream_readable"_kj, "_stream_transform"_kj, + "_stream_wrap"_kj, "_stream_writable"_kj, "_tls_common"_kj, "_tls_wrap"_kj, "assert"_kj, + "assert/strict"_kj, "async_hooks"_kj, "buffer"_kj, "child_process"_kj, "cluster"_kj, "console"_kj, + "constants"_kj, "crypto"_kj, "dgram"_kj, "diagnostics_channel"_kj, "dns"_kj, "dns/promises"_kj, + "domain"_kj, "events"_kj, "fs"_kj, "fs/promises"_kj, "http"_kj, "http2"_kj, "https"_kj, + "inspector"_kj, "inspector/promises"_kj, "module"_kj, "net"_kj, "os"_kj, "path"_kj, + "path/posix"_kj, "path/win32"_kj, "perf_hooks"_kj, "process"_kj, "punycode"_kj, "querystring"_kj, + "readline"_kj, "readline/promises"_kj, "repl"_kj, "stream"_kj, "stream/consumers"_kj, + "stream/promises"_kj, "stream/web"_kj, "string_decoder"_kj, "sys"_kj, "timers"_kj, + "timers/promises"_kj, "tls"_kj, "trace_events"_kj, "tty"_kj, "url"_kj, "util"_kj, "util/types"_kj, + "v8"_kj, "vm"_kj, "wasi"_kj, "worker_threads"_kj, "zlib"_kj}; +} // namespace + +kj::Maybe<kj::String> checkNodeSpecifier(kj::StringPtr specifier) { + if (NODEJS_BUILTINS.contains(specifier)) { + return kj::str("node:", specifier); + } else if (specifier.startsWith("node:")) { + return kj::str(specifier); + } + return kj::none; +} + +bool isNodeJsCompatEnabled(jsg::Lock& js) { + return IsolateBase::from(js.v8Isolate).isNodeJsCompatEnabled(); +} + } // namespace workerd::jsg diff --git a/src/workerd/jsg/util.h b/src/workerd/jsg/util.h index b40fdf9bcc1..847fccb66f1 100644 --- a/src/workerd/jsg/util.h +++ b/src/workerd/jsg/util.h @@ -476,4 +476,10 @@ struct Unimplemented {}; // standard ServiceWorker APIs that don't make sense for Workers. using WontImplement = Unimplemented; +// ====================================================================================== +// Node.js Compat + +kj::Maybe<kj::String> checkNodeSpecifier(kj::StringPtr specifier); +bool isNodeJsCompatEnabled(jsg::Lock& js); + } // namespace workerd::jsg