Skip to content

Commit

Permalink
prelinking (#107)
Browse files Browse the repository at this point in the history
* adds prelink.rs

* qualify freestanding function defined in another module

---------

Co-authored-by: Phil Cummins <[email protected]>
  • Loading branch information
philrhc and Phil Cummins authored Sep 30, 2024
1 parent 82afee4 commit 5aa9438
Show file tree
Hide file tree
Showing 2 changed files with 273 additions and 264 deletions.
295 changes: 31 additions & 264 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use {
indexmap::{IndexMap, IndexSet},
serde::Deserialize,
std::{
collections::{HashMap, HashSet},
collections::HashMap,
env, fs,
io::Cursor,
iter,
Expand All @@ -19,7 +19,6 @@ use {
str,
},
summary::{Escape, Locations, Summary},
tar::Archive,
wasm_convert::IntoValType,
wasm_encoder::{
CodeSection, ExportKind, ExportSection, Function, FunctionSection, Instruction, Module,
Expand All @@ -35,22 +34,20 @@ use {
DirPerms, FilePerms, WasiCtx, WasiCtxBuilder, WasiView,
},
wit_parser::{Resolve, TypeDefKind, UnresolvedPackageGroup, WorldId, WorldItem, WorldKey},
zstd::Decoder,
};

mod abi;
mod bindgen;
mod bindings;
pub mod command;
mod prelink;
#[cfg(feature = "pyo3")]
mod python;
mod summary;
#[cfg(test)]
mod test;
mod util;

static NATIVE_EXTENSION_SUFFIX: &str = ".cpython-312-wasm32-wasi.so";

wasmtime::component::bindgen!({
path: "wit",
world: "init",
Expand All @@ -62,6 +59,12 @@ pub struct Ctx {
table: ResourceTable,
}

pub struct Library {
name: String,
module: Vec<u8>,
dl_openable: bool,
}

impl WasiView for Ctx {
fn ctx(&mut self) -> &mut WasiCtx {
&mut self.wasi
Expand Down Expand Up @@ -104,7 +107,7 @@ impl TryFrom<(&Path, RawComponentizePyConfig)> for ComponentizePyConfig {
}

#[derive(Debug)]
struct ConfigContext<T> {
pub struct ConfigContext<T> {
module: String,
root: PathBuf,
path: PathBuf,
Expand Down Expand Up @@ -207,85 +210,14 @@ pub async fn componentize(
.filter_map(|&s| Path::new(s).exists().then_some(s))
.collect::<Vec<_>>();

// Untar the embedded copy of the Python standard library into a temporary directory
let stdlib = tempfile::tempdir()?;

Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/python-lib.tar.zst"
))))?)
.unpack(stdlib.path())?;

// Untar the embedded copy of helper utilities into a temporary directory
let bundled = tempfile::tempdir()?;

Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/bundled.tar.zst"
))))?)
.unpack(bundled.path())?;

// Search `python_path` for native extension libraries and/or componentize-py.toml files. Packages containing
// the latter may contain their own WIT files defining their own worlds (in addition to what the caller
// specified as paramters), which we'll try to match up with `module_worlds` in the next step.
let mut raw_configs = Vec::new();
let mut library_path = Vec::with_capacity(python_path.len());
for path in python_path {
let mut libraries = Vec::new();
search_directory(
Path::new(path),
Path::new(path),
&mut libraries,
&mut raw_configs,
&mut HashSet::new(),
)?;
library_path.push((*path, libraries));
}

// Validate the paths parsed from any componentize-py.toml files discovered above and match them up with
// `module_worlds` entries. Note that we use an `IndexMap` to preserve the order specified in `module_worlds`,
// which is required to be topologically sorted with respect to package dependencies.
//
// For any packages which contain componentize-py.toml files but no corresponding `module_worlds` entry, we use
// the `world` parameter as a default.
let configs = {
let mut configs = raw_configs
.into_iter()
.map(|raw_config| {
let config =
ComponentizePyConfig::try_from((raw_config.path.deref(), raw_config.config))?;

Ok((
raw_config.module.clone(),
ConfigContext {
module: raw_config.module,
root: raw_config.root,
path: raw_config.path,
config,
},
))
})
.collect::<Result<HashMap<_, _>>>()?;

let mut ordered = IndexMap::new();
for (module, world) in module_worlds {
if let Some(config) = configs.remove(*module) {
ordered.insert((*module).to_owned(), (config, Some(*world)));
} else {
bail!("no `componentize-py.toml` file found for module `{module}`");
}
}
let embedded_python_standard_lib = prelink::embedded_python_standard_library()?;
let embedded_helper_utils = prelink::embedded_helper_utils()?;

for (module, config) in configs {
ordered.insert(module, (config, world));
}

ordered
};
let (configs, mut libraries) =
prelink::search_for_libraries_and_configs(python_path, module_worlds, world)?;

// Next, iterate over all the WIT directories, merging them into a single `Resolve`, and matching Python
// packages to `WorldId`s.

let (mut resolve, mut main_world) = if let Some(path) = wit_path {
let (resolve, world) = parse_wit(path, world)?;
(Some(resolve), Some(world))
Expand Down Expand Up @@ -341,108 +273,11 @@ pub async fn componentize(

let summary = Summary::try_new(&resolve, &worlds)?;

struct Library {
name: String,
module: Vec<u8>,
dl_openable: bool,
}

let mut libraries = vec![
Library {
name: "libcomponentize_py_runtime.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libcomponentize_py_runtime.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libpython3.12.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libpython3.12.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libc.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libc.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libwasi-emulated-mman.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libwasi-emulated-mman.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libwasi-emulated-process-clocks.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libwasi-emulated-process-clocks.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libwasi-emulated-getpid.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libwasi-emulated-getpid.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libwasi-emulated-signal.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libwasi-emulated-signal.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libc++.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libc++.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libc++abi.so".into(),
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
env!("OUT_DIR"),
"/libc++abi.so.zst"
))))?,
dl_openable: false,
},
Library {
name: "libcomponentize_py_bindings.so".into(),
module: bindings::make_bindings(&resolve, &worlds, &summary)?,
dl_openable: false,
},
];

for (index, (path, libs)) in library_path.iter().enumerate() {
for library in libs {
let path = library
.strip_prefix(path)
.unwrap()
.to_str()
.context("non-UTF-8 path")?
.replace('\\', "/");

libraries.push(Library {
name: format!("/{index}/{path}"),
module: fs::read(library)?,
dl_openable: true,
});
}
}
libraries.push(Library {
name: "libcomponentize_py_bindings.so".into(),
module: bindings::make_bindings(&resolve, &worlds, &summary)?,
dl_openable: false,
});

// Link all the libraries (including any native extensions) into a single component.
let mut linker = wit_component::Linker::default().validate(true);
Expand Down Expand Up @@ -534,8 +369,18 @@ pub async fn componentize(
.env("PYTHONUNBUFFERED", "1")
.env("COMPONENTIZE_PY_APP_NAME", app_name)
.env("PYTHONHOME", "/python")
.preopened_dir(stdlib.path(), "python", DirPerms::all(), FilePerms::all())?
.preopened_dir(bundled.path(), "bundled", DirPerms::all(), FilePerms::all())?;
.preopened_dir(
embedded_python_standard_lib.path(),
"python",
DirPerms::all(),
FilePerms::all(),
)?
.preopened_dir(
embedded_helper_utils.path(),
"bundled",
DirPerms::all(),
FilePerms::all(),
)?;

// Generate guest mounts for each host directory in `python_path`.
for (index, path) in python_path.iter().enumerate() {
Expand Down Expand Up @@ -628,7 +473,7 @@ pub async fn componentize(

Ok(())
}
replace(bundled.path(), "proxy", &module)?;
replace(embedded_helper_utils.path(), "proxy", &module)?;
};

for (mounts, world_dir) in world_dir_mounts.iter() {
Expand Down Expand Up @@ -828,84 +673,6 @@ fn add_wasi_and_stubs(
Ok(())
}

fn search_directory(
root: &Path,
path: &Path,
libraries: &mut Vec<PathBuf>,
configs: &mut Vec<ConfigContext<RawComponentizePyConfig>>,
modules_seen: &mut HashSet<String>,
) -> Result<()> {
if path.is_dir() {
for entry in fs::read_dir(path)? {
search_directory(root, &entry?.path(), libraries, configs, modules_seen)?;
}
} else if let Some(name) = path.file_name().and_then(|name| name.to_str()) {
if name.ends_with(NATIVE_EXTENSION_SUFFIX) {
libraries.push(path.to_owned());
} else if name == "componentize-py.toml" {
let root = root.canonicalize()?;
let path = path.canonicalize()?;

let module = module_name(&root, &path)
.ok_or_else(|| anyhow!("unable to determine module name for {}", path.display()))?;

let mut push = true;
for existing in &mut *configs {
if path == existing.path.join("componentize-py.toml") {
// When one directory in `PYTHON_PATH` is a subdirectory of the other, we consider the
// subdirectory to be the true owner of the file. This is important later, when we derive a
// package name by stripping the root directory from the file path.
if root > existing.root {
module.clone_into(&mut existing.module);
root.clone_into(&mut existing.root);
path.parent().unwrap().clone_into(&mut existing.path);
}
push = false;
break;
} else {
// If we find a componentize-py.toml file under a Python module which will not be used because
// we already found a version of that module in an earlier `PYTHON_PATH` directory, we'll
// ignore the latest one.
//
// For example, if the module `foo_sdk` appears twice in `PYTHON_PATH`, and both versions have
// a componentize-py.toml file, we'll ignore the second one just as Python will ignore the
// second module.

if modules_seen.contains(&module) {
bail!("multiple `componentize-py.toml` files found in module `{module}`");
}

modules_seen.insert(module.clone());

if module == existing.module {
push = false;
break;
}
}
}

if push {
configs.push(ConfigContext {
module,
root: root.to_owned(),
path: path.parent().unwrap().to_owned(),
config: toml::from_str::<RawComponentizePyConfig>(&fs::read_to_string(path)?)?,
});
}
}
}

Ok(())
}

fn module_name(root: &Path, path: &Path) -> Option<String> {
if let [first, _, ..] = &path.strip_prefix(root).ok()?.iter().collect::<Vec<_>>()[..] {
first.to_str().map(|s| s.to_owned())
} else {
None
}
}

fn add_wasi_imports<'a>(
module: &'a [u8],
imports: &mut HashMap<&'a str, HashMap<&'a str, FuncType>>,
Expand Down
Loading

0 comments on commit 5aa9438

Please sign in to comment.