From 5fada36da4bedfe1901e14fb45feddb143371883 Mon Sep 17 00:00:00 2001 From: Dominik Picheta Date: Tue, 19 Nov 2024 17:04:49 +0000 Subject: [PATCH] Implements support for external package loading in validator. --- .bazelrc | 1 + src/cloudflare/internal/test/ai/BUILD.bazel | 4 +++ src/cloudflare/internal/test/d1/BUILD.bazel | 4 +++ .../internal/test/vectorize/BUILD.bazel | 4 +++ src/pyodide/internal/python.ts | 16 ++++++++-- src/pyodide/internal/setupPackages.ts | 7 ++++- src/pyodide/internal/snapshot.ts | 7 +++-- src/pyodide/internal/tar.ts | 4 +-- src/pyodide/python-entrypoint-helper.ts | 31 +++---------------- src/pyodide/types/FS.d.ts | 2 +- src/workerd/api/pyodide/pyodide.c++ | 5 ++- src/workerd/api/pyodide/pyodide.h | 2 +- src/workerd/server/workerd-api.c++ | 1 + 13 files changed, 48 insertions(+), 40 deletions(-) diff --git a/.bazelrc b/.bazelrc index 2d5b0a9ed36..9df5288d047 100644 --- a/.bazelrc +++ b/.bazelrc @@ -2,6 +2,7 @@ common --enable_platform_specific_config build --verbose_failures build --build_tag_filters=-off-by-default test --test_tag_filters=-off-by-default +test:asan --test_tag_filters=-off-by-default,-no-asan # exclude enormous tests by default build --test_size_filters=-enormous diff --git a/src/cloudflare/internal/test/ai/BUILD.bazel b/src/cloudflare/internal/test/ai/BUILD.bazel index d2219a1e1ac..085d8be288e 100644 --- a/src/cloudflare/internal/test/ai/BUILD.bazel +++ b/src/cloudflare/internal/test/ai/BUILD.bazel @@ -15,4 +15,8 @@ py_wd_test( "*.js", "*.py", ]), + tags = [ + # TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318 + "no-asan", + ], ) diff --git a/src/cloudflare/internal/test/d1/BUILD.bazel b/src/cloudflare/internal/test/d1/BUILD.bazel index d6fa0389ce6..461decb6923 100644 --- a/src/cloudflare/internal/test/d1/BUILD.bazel +++ b/src/cloudflare/internal/test/d1/BUILD.bazel @@ -24,4 +24,8 @@ py_wd_test( "*.py", "*.js", ]), + tags = [ + # TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318 + "no-asan", + ], ) diff --git a/src/cloudflare/internal/test/vectorize/BUILD.bazel b/src/cloudflare/internal/test/vectorize/BUILD.bazel index 28292c96981..e83c8688bc0 100644 --- a/src/cloudflare/internal/test/vectorize/BUILD.bazel +++ b/src/cloudflare/internal/test/vectorize/BUILD.bazel @@ -13,4 +13,8 @@ py_wd_test( "*.py", "*.js", ]), + tags = [ + # TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318 + "no-asan", + ], ) diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index b036b5f2886..96824c79079 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -1,6 +1,7 @@ import { enterJaegerSpan } from 'pyodide-internal:jaeger'; import { SITE_PACKAGES, + TRANSITIVE_REQUIREMENTS, adjustSysPath, mountSitePackages, mountWorkerFiles, @@ -50,6 +51,7 @@ import { setUnsafeEval, setGetRandomValues, } from 'pyodide-internal:generated/emscriptenSetup'; +import { loadPackages } from 'pyodide-internal:loadPackage'; /** * After running `instantiateEmscriptenModule` but before calling into any C @@ -59,9 +61,8 @@ import { */ async function prepareWasmLinearMemory(Module: Module): Promise { // Note: if we are restoring from a snapshot, runtime is not initialized yet. - mountSitePackages(Module, SITE_PACKAGES.rootInfo); - entropyMountFiles(Module); Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT; + enterJaegerSpan('preload_dynamic_libs', () => preloadDynamicLibs(Module)); enterJaegerSpan('remove_run_dependency', () => Module.removeRunDependency('dynlibs') @@ -100,9 +101,19 @@ export async function loadPyodide( } setUnsafeEval(UnsafeEval); setGetRandomValues(getRandomValues); + + mountSitePackages(Module, SITE_PACKAGES.rootInfo); + entropyMountFiles(Module); + await enterJaegerSpan('load_packages', () => + // NB. loadPackages adds the packages to the `SITE_PACKAGES` global which then gets used in + // preloadDynamicLibs. + loadPackages(Module, TRANSITIVE_REQUIREMENTS) + ); + await enterJaegerSpan('prepare_wasm_linear_memory', () => prepareWasmLinearMemory(Module) ); + maybeSetupSnapshotUpload(Module); // Mount worker files after doing snapshot upload so we ensure that data from the files is never // present in snapshot memory. @@ -111,6 +122,7 @@ export async function loadPyodide( // Finish setting up Pyodide's ffi so we can use the nice Python interface await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap); const pyodide = Module.API.public_api; + finishSnapshotSetup(pyodide); return pyodide; } diff --git a/src/pyodide/internal/setupPackages.ts b/src/pyodide/internal/setupPackages.ts index ce8d47c459a..775b2150630 100644 --- a/src/pyodide/internal/setupPackages.ts +++ b/src/pyodide/internal/setupPackages.ts @@ -8,6 +8,7 @@ import { LOAD_WHEELS_FROM_ARTIFACT_BUNDLER, } from 'pyodide-internal:metadata'; import { simpleRunPython } from 'pyodide-internal:util'; +import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader'; const canonicalizeNameRegex = /[-_.]+/g; @@ -44,6 +45,7 @@ class SitePackagesDir { path: '', name: '', parts: [], + reader: null, }; this.soFiles = []; this.loadedRequirements = new Set(); @@ -125,9 +127,11 @@ class SitePackagesDir { * * This also returns the list of soFiles in the resulting site-packages * directory so we can preload them. + * + * TODO(later): This needs to be removed when external package loading is enabled. */ export function buildSitePackages(requirements: Set): SitePackagesDir { - const [bigTarInfo, bigTarSoFiles] = parseTarInfo(); + const [bigTarInfo, bigTarSoFiles] = parseTarInfo(EmbeddedPackagesTarReader); let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]); @@ -171,6 +175,7 @@ function disabledLoadPackage(): never { function getTransitiveRequirements(): Set { const requirements = REQUIREMENTS.map(canonicalizePackageName); // resolve transitive dependencies of requirements and if IN_WORKERD install them from the cdn. + // TODO(later): use current package's LOCKFILE instead of the global. const packageDatas = recursiveDependencies(LOCKFILE, requirements); return new Set(packageDatas.map(({ name }) => canonicalizePackageName(name))); } diff --git a/src/pyodide/internal/snapshot.ts b/src/pyodide/internal/snapshot.ts index 5b93c616bc7..b6cbd90b3af 100644 --- a/src/pyodide/internal/snapshot.ts +++ b/src/pyodide/internal/snapshot.ts @@ -5,7 +5,7 @@ import { SITE_PACKAGES, getSitePackagesPath, } from 'pyodide-internal:setupPackages'; -import { default as TarReader } from 'pyodide-internal:packages_tar_reader'; +import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader'; import { SHOULD_SNAPSHOT_TO_DISK, IS_CREATING_BASELINE_SNAPSHOT, @@ -136,7 +136,10 @@ export function preloadDynamicLibs(Module: Module): void { throw Error('contentsOffset not defined for ' + soFile); } const wasmModuleData = new Uint8Array(size); - TarReader.read(contentsOffset, wasmModuleData); + (node.reader ?? EmbeddedPackagesTarReader).read( + contentsOffset, + wasmModuleData + ); const path = sitePackages + '/' + soFile.join('/'); PRELOADED_SO_FILES.push(path); loadDynlib(Module, path, wasmModuleData); diff --git a/src/pyodide/internal/tar.ts b/src/pyodide/internal/tar.ts index 05dd754234b..3429cddc397 100644 --- a/src/pyodide/internal/tar.ts +++ b/src/pyodide/internal/tar.ts @@ -1,5 +1,3 @@ -import { default as TarReader } from 'pyodide-internal:packages_tar_reader'; - // This is based on the info about the tar file format on wikipedia // And some trial and error with real tar files. // https://en.wikipedia.org/wiki/Tar_(computing)#File_format @@ -44,7 +42,7 @@ function decodeHeader(buf: Uint8Array, reader: Reader): TarFSInfo { }; } -export function parseTarInfo(reader = TarReader): [TarFSInfo, string[]] { +export function parseTarInfo(reader: Reader): [TarFSInfo, string[]] { const directories: TarFSInfo[] = []; const soFiles = []; const root: TarFSInfo = { diff --git a/src/pyodide/python-entrypoint-helper.ts b/src/pyodide/python-entrypoint-helper.ts index 188613106d4..121b889472e 100644 --- a/src/pyodide/python-entrypoint-helper.ts +++ b/src/pyodide/python-entrypoint-helper.ts @@ -21,7 +21,6 @@ import { import { reportError } from 'pyodide-internal:util'; import { default as Limiter } from 'pyodide-internal:limiter'; import { entropyBeforeRequest } from 'pyodide-internal:topLevelEntropy/lib'; -import { loadPackages } from 'pyodide-internal:loadPackage'; function pyimportMainModule(pyodide: Pyodide): PyModule { if (!MAIN_MODULE_NAME.endsWith('.py')) { @@ -74,21 +73,10 @@ async function applyPatch(pyodide: Pyodide, patchName: string): Promise { pyodide.pyimport(patchName + '_patch'); } -/** - * Set up Python packages: - * - patch loadPackage to ignore integrity - * - get requirements - * - Use tar file + requirements to mount site packages directory - * - if in workerd use loadPackage to load packages - * - install patches to make various requests packages work - * - * TODO: move this into setupPackages.js. Can't now because the patch imports - * fail from there for some reason. - */ -export async function setupPackages(pyodide: Pyodide): Promise { - return await enterJaegerSpan('setup_packages', async () => { +async function setupPatches(pyodide: Pyodide): Promise { + return await enterJaegerSpan('setup_patches', async () => { patchLoadPackage(pyodide); - await loadPackages(pyodide._module, TRANSITIVE_REQUIREMENTS); + // install any extra packages into the site-packages directory, so calculate where that is. const pymajor = pyodide._module._py_version_major(); const pyminor = pyodide._module._py_version_minor(); @@ -119,7 +107,7 @@ function getMainModule(): Promise { } mainModulePromise = (async function () { const pyodide = await getPyodide(); - await setupPackages(pyodide); + await setupPatches(pyodide); Limiter.beginStartup(); try { return enterJaegerSpan('pyimport_main_module', () => @@ -173,17 +161,6 @@ const handlers: { try { // Do not setup anything to do with Python in the global scope when tracing. The Jaeger tracing // needs to be called inside an IO context. - if (!IS_TRACING) { - if (IS_WORKERD) { - // If we're in workerd, we have to do setupPackages in the IoContext, so don't start it yet. - // TODO: fix this. - await getPyodide(); - } else { - // If we're not in workerd, setupPackages doesn't require IO so we can do it all here. - await getMainModule(); - } - } - if (IS_WORKERD || IS_TRACING) { handlers.fetch = makeHandler('on_fetch'); handlers.test = makeHandler('test'); diff --git a/src/pyodide/types/FS.d.ts b/src/pyodide/types/FS.d.ts index edbc9f4e48f..b6002b5f47b 100644 --- a/src/pyodide/types/FS.d.ts +++ b/src/pyodide/types/FS.d.ts @@ -12,7 +12,7 @@ interface TarFSInfo { name: string; parts: string[]; contentsOffset?: number; - reader?: Reader; + reader: Reader | null; } declare type MetadataDirInfo = Map; diff --git a/src/workerd/api/pyodide/pyodide.c++ b/src/workerd/api/pyodide/pyodide.c++ index 9bde1f3e0bc..e87ffcc09eb 100644 --- a/src/workerd/api/pyodide/pyodide.c++ +++ b/src/workerd/api/pyodide/pyodide.c++ @@ -35,10 +35,9 @@ void PyodideBundleManager::setPyodideBundleData( kj::mv(version), {.messageReader = kj::mv(messageReader), .bundle = bundle}); } -const kj::Maybe> PyodidePackageManager::getPyodidePackage( +const kj::Maybe&> PyodidePackageManager::getPyodidePackage( kj::StringPtr id) const { - return packages.lockShared()->find(id).map( - [](const kj::Array& t) { return t.asPtr(); }); + return packages.lockShared()->find(id); } void PyodidePackageManager::setPyodidePackageData( diff --git a/src/workerd/api/pyodide/pyodide.h b/src/workerd/api/pyodide/pyodide.h index 38812c4c0a4..57774143b08 100644 --- a/src/workerd/api/pyodide/pyodide.h +++ b/src/workerd/api/pyodide/pyodide.h @@ -38,7 +38,7 @@ class PyodideBundleManager { class PyodidePackageManager { public: void setPyodidePackageData(kj::String id, kj::Array data) const; - const kj::Maybe> getPyodidePackage(kj::StringPtr id) const; + const kj::Maybe&> getPyodidePackage(kj::StringPtr id) const; private: const kj::MutexGuarded>> packages; diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index 9002b60bc03..60d4cb048e8 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -558,6 +558,7 @@ void WorkerdApi::compileModules(jsg::Lock& lockParam, makePyodideMetadataReader(conf, impl->pythonConfig), jsg::ModuleRegistry::Type::INTERNAL); // Inject packages tar file + // TODO(later): This shouldn't exist once featureFlags.getPythonExternalPackages() is true. modules->addBuiltinModule("pyodide-internal:packages_tar_reader", jsg::alloc(PYODIDE_PACKAGES_TAR.get()), workerd::jsg::ModuleRegistry::Type::INTERNAL);