Skip to content

Commit

Permalink
Merge pull request #3140 from cloudflare/dominik/EW-8835
Browse files Browse the repository at this point in the history
Implements support for external package loading in validator.
  • Loading branch information
dom96 authored Nov 27, 2024
2 parents 735aeb2 + 5fada36 commit 564c8a2
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 40 deletions.
1 change: 1 addition & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ common --enable_platform_specific_config
build --verbose_failures
build --build_tag_filters=-off-by-default
test --test_tag_filters=-off-by-default
test:asan --test_tag_filters=-off-by-default,-no-asan
# exclude enormous tests by default
build --test_size_filters=-enormous

Expand Down
4 changes: 4 additions & 0 deletions src/cloudflare/internal/test/ai/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,8 @@ py_wd_test(
"*.js",
"*.py",
]),
tags = [
# TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318
"no-asan",
],
)
4 changes: 4 additions & 0 deletions src/cloudflare/internal/test/d1/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ py_wd_test(
"*.py",
"*.js",
]),
tags = [
# TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318
"no-asan",
],
)
4 changes: 4 additions & 0 deletions src/cloudflare/internal/test/vectorize/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@ py_wd_test(
"*.py",
"*.js",
]),
tags = [
# TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318
"no-asan",
],
)
16 changes: 14 additions & 2 deletions src/pyodide/internal/python.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { enterJaegerSpan } from 'pyodide-internal:jaeger';
import {
SITE_PACKAGES,
TRANSITIVE_REQUIREMENTS,
adjustSysPath,
mountSitePackages,
mountWorkerFiles,
Expand Down Expand Up @@ -50,6 +51,7 @@ import {
setUnsafeEval,
setGetRandomValues,
} from 'pyodide-internal:generated/emscriptenSetup';
import { loadPackages } from 'pyodide-internal:loadPackage';

/**
* After running `instantiateEmscriptenModule` but before calling into any C
Expand All @@ -59,9 +61,8 @@ import {
*/
async function prepareWasmLinearMemory(Module: Module): Promise<void> {
// Note: if we are restoring from a snapshot, runtime is not initialized yet.
mountSitePackages(Module, SITE_PACKAGES.rootInfo);
entropyMountFiles(Module);
Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT;

enterJaegerSpan('preload_dynamic_libs', () => preloadDynamicLibs(Module));
enterJaegerSpan('remove_run_dependency', () =>
Module.removeRunDependency('dynlibs')
Expand Down Expand Up @@ -100,9 +101,19 @@ export async function loadPyodide(
}
setUnsafeEval(UnsafeEval);
setGetRandomValues(getRandomValues);

mountSitePackages(Module, SITE_PACKAGES.rootInfo);
entropyMountFiles(Module);
await enterJaegerSpan('load_packages', () =>
// NB. loadPackages adds the packages to the `SITE_PACKAGES` global which then gets used in
// preloadDynamicLibs.
loadPackages(Module, TRANSITIVE_REQUIREMENTS)
);

await enterJaegerSpan('prepare_wasm_linear_memory', () =>
prepareWasmLinearMemory(Module)
);

maybeSetupSnapshotUpload(Module);
// Mount worker files after doing snapshot upload so we ensure that data from the files is never
// present in snapshot memory.
Expand All @@ -111,6 +122,7 @@ export async function loadPyodide(
// Finish setting up Pyodide's ffi so we can use the nice Python interface
await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap);
const pyodide = Module.API.public_api;

finishSnapshotSetup(pyodide);
return pyodide;
}
7 changes: 6 additions & 1 deletion src/pyodide/internal/setupPackages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
LOAD_WHEELS_FROM_ARTIFACT_BUNDLER,
} from 'pyodide-internal:metadata';
import { simpleRunPython } from 'pyodide-internal:util';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';

const canonicalizeNameRegex = /[-_.]+/g;

Expand Down Expand Up @@ -44,6 +45,7 @@ class SitePackagesDir {
path: '',
name: '',
parts: [],
reader: null,
};
this.soFiles = [];
this.loadedRequirements = new Set();
Expand Down Expand Up @@ -125,9 +127,11 @@ class SitePackagesDir {
*
* This also returns the list of soFiles in the resulting site-packages
* directory so we can preload them.
*
* TODO(later): This needs to be removed when external package loading is enabled.
*/
export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
const [bigTarInfo, bigTarSoFiles] = parseTarInfo();
const [bigTarInfo, bigTarSoFiles] = parseTarInfo(EmbeddedPackagesTarReader);

let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]);

Expand Down Expand Up @@ -171,6 +175,7 @@ function disabledLoadPackage(): never {
function getTransitiveRequirements(): Set<string> {
const requirements = REQUIREMENTS.map(canonicalizePackageName);
// resolve transitive dependencies of requirements and if IN_WORKERD install them from the cdn.
// TODO(later): use current package's LOCKFILE instead of the global.
const packageDatas = recursiveDependencies(LOCKFILE, requirements);
return new Set(packageDatas.map(({ name }) => canonicalizePackageName(name)));
}
Expand Down
7 changes: 5 additions & 2 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
SITE_PACKAGES,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';
import {
SHOULD_SNAPSHOT_TO_DISK,
IS_CREATING_BASELINE_SNAPSHOT,
Expand Down Expand Up @@ -136,7 +136,10 @@ export function preloadDynamicLibs(Module: Module): void {
throw Error('contentsOffset not defined for ' + soFile);
}
const wasmModuleData = new Uint8Array(size);
TarReader.read(contentsOffset, wasmModuleData);
(node.reader ?? EmbeddedPackagesTarReader).read(
contentsOffset,
wasmModuleData
);
const path = sitePackages + '/' + soFile.join('/');
PRELOADED_SO_FILES.push(path);
loadDynlib(Module, path, wasmModuleData);
Expand Down
4 changes: 1 addition & 3 deletions src/pyodide/internal/tar.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';

// This is based on the info about the tar file format on wikipedia
// And some trial and error with real tar files.
// https://en.wikipedia.org/wiki/Tar_(computing)#File_format
Expand Down Expand Up @@ -44,7 +42,7 @@ function decodeHeader(buf: Uint8Array, reader: Reader): TarFSInfo {
};
}

export function parseTarInfo(reader = TarReader): [TarFSInfo, string[]] {
export function parseTarInfo(reader: Reader): [TarFSInfo, string[]] {
const directories: TarFSInfo[] = [];
const soFiles = [];
const root: TarFSInfo = {
Expand Down
31 changes: 4 additions & 27 deletions src/pyodide/python-entrypoint-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import {
import { reportError } from 'pyodide-internal:util';
import { default as Limiter } from 'pyodide-internal:limiter';
import { entropyBeforeRequest } from 'pyodide-internal:topLevelEntropy/lib';
import { loadPackages } from 'pyodide-internal:loadPackage';

function pyimportMainModule(pyodide: Pyodide): PyModule {
if (!MAIN_MODULE_NAME.endsWith('.py')) {
Expand Down Expand Up @@ -74,21 +73,10 @@ async function applyPatch(pyodide: Pyodide, patchName: string): Promise<void> {
pyodide.pyimport(patchName + '_patch');
}

/**
* Set up Python packages:
* - patch loadPackage to ignore integrity
* - get requirements
* - Use tar file + requirements to mount site packages directory
* - if in workerd use loadPackage to load packages
* - install patches to make various requests packages work
*
* TODO: move this into setupPackages.js. Can't now because the patch imports
* fail from there for some reason.
*/
export async function setupPackages(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_packages', async () => {
async function setupPatches(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_patches', async () => {
patchLoadPackage(pyodide);
await loadPackages(pyodide._module, TRANSITIVE_REQUIREMENTS);

// install any extra packages into the site-packages directory, so calculate where that is.
const pymajor = pyodide._module._py_version_major();
const pyminor = pyodide._module._py_version_minor();
Expand Down Expand Up @@ -119,7 +107,7 @@ function getMainModule(): Promise<PyModule> {
}
mainModulePromise = (async function () {
const pyodide = await getPyodide();
await setupPackages(pyodide);
await setupPatches(pyodide);
Limiter.beginStartup();
try {
return enterJaegerSpan('pyimport_main_module', () =>
Expand Down Expand Up @@ -173,17 +161,6 @@ const handlers: {
try {
// Do not setup anything to do with Python in the global scope when tracing. The Jaeger tracing
// needs to be called inside an IO context.
if (!IS_TRACING) {
if (IS_WORKERD) {
// If we're in workerd, we have to do setupPackages in the IoContext, so don't start it yet.
// TODO: fix this.
await getPyodide();
} else {
// If we're not in workerd, setupPackages doesn't require IO so we can do it all here.
await getMainModule();
}
}

if (IS_WORKERD || IS_TRACING) {
handlers.fetch = makeHandler('on_fetch');
handlers.test = makeHandler('test');
Expand Down
2 changes: 1 addition & 1 deletion src/pyodide/types/FS.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ interface TarFSInfo {
name: string;
parts: string[];
contentsOffset?: number;
reader?: Reader;
reader: Reader | null;
}

declare type MetadataDirInfo = Map<string, MetadataDirInfo>;
Expand Down
5 changes: 2 additions & 3 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,9 @@ void PyodideBundleManager::setPyodideBundleData(
kj::mv(version), {.messageReader = kj::mv(messageReader), .bundle = bundle});
}

const kj::Maybe<kj::ArrayPtr<const unsigned char>> PyodidePackageManager::getPyodidePackage(
const kj::Maybe<const kj::Array<unsigned char>&> PyodidePackageManager::getPyodidePackage(
kj::StringPtr id) const {
return packages.lockShared()->find(id).map(
[](const kj::Array<unsigned char>& t) { return t.asPtr(); });
return packages.lockShared()->find(id);
}

void PyodidePackageManager::setPyodidePackageData(
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PyodideBundleManager {
class PyodidePackageManager {
public:
void setPyodidePackageData(kj::String id, kj::Array<unsigned char> data) const;
const kj::Maybe<kj::ArrayPtr<const unsigned char>> getPyodidePackage(kj::StringPtr id) const;
const kj::Maybe<const kj::Array<unsigned char>&> getPyodidePackage(kj::StringPtr id) const;

private:
const kj::MutexGuarded<kj::HashMap<kj::String, kj::Array<unsigned char>>> packages;
Expand Down
1 change: 1 addition & 0 deletions src/workerd/server/workerd-api.c++
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ void WorkerdApi::compileModules(jsg::Lock& lockParam,
makePyodideMetadataReader(conf, impl->pythonConfig), jsg::ModuleRegistry::Type::INTERNAL);

// Inject packages tar file
// TODO(later): This shouldn't exist once featureFlags.getPythonExternalPackages() is true.
modules->addBuiltinModule("pyodide-internal:packages_tar_reader",
jsg::alloc<ReadOnlyBuffer>(PYODIDE_PACKAGES_TAR.get()),
workerd::jsg::ModuleRegistry::Type::INTERNAL);
Expand Down

0 comments on commit 564c8a2

Please sign in to comment.