Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wasm-split] Add a multi-split mode #6943

Merged
merged 6 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions src/tools/wasm-split/split-options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) {
case WasmSplitOptions::Mode::Split:
o << "split";
break;
case WasmSplitOptions::Mode::MultiSplit:
o << "multi-split";
break;
case WasmSplitOptions::Mode::Instrument:
o << "instrument";
break;
Expand Down Expand Up @@ -91,7 +94,14 @@ WasmSplitOptions::WasmSplitOptions()
"Split an input module into two output modules. The default mode.",
WasmSplitOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& arugment) { mode = Mode::Split; })
[&](Options* o, const std::string& argument) { mode = Mode::Split; })
.add(
"--multi-split",
"",
"Split an input module into an arbitrary number of output modules.",
WasmSplitOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { mode = Mode::MultiSplit; })
.add(
"--instrument",
"",
Expand Down Expand Up @@ -151,6 +161,25 @@ WasmSplitOptions::WasmSplitOptions()
[&](Options* o, const std::string& argument) {
splitFuncs = parseNameList(argument);
})
.add(
"--manifest",
"",
"File describing the functions to be split into each module. Each "
"section separated by a blank line begins with the base name of an "
"output module, which is followed by a list of functions to place in "
"that module, one per line.",
WasmSplitOption,
{Mode::MultiSplit},
Options::Arguments::One,
[&](Options* o, const std::string& argument) { manifestFile = argument; })
.add("--out-prefix",
"",
"Prefix prepended to module names in the manifest file to create "
"output file names.",
WasmSplitOption,
{Mode::MultiSplit},
Options::Arguments::One,
[&](Options* o, const std::string& argument) { outPrefix = argument; })
.add("--primary-output",
"-o1",
"Output file for the primary module.",
Expand Down Expand Up @@ -313,7 +342,7 @@ WasmSplitOptions::WasmSplitOptions()
"-g",
"Emit names section in wasm binary (or full debuginfo in wast)",
WasmSplitOption,
{Mode::Split, Mode::Instrument},
{Mode::Split, Mode::MultiSplit, Mode::Instrument},
Options::Arguments::Zero,
[&](Options* o, const std::string& arguments) {
passOptions.debugInfo = true;
Expand All @@ -322,7 +351,7 @@ WasmSplitOptions::WasmSplitOptions()
"-o",
"Output file.",
WasmSplitOption,
{Mode::Instrument, Mode::MergeProfiles},
{Mode::Instrument, Mode::MergeProfiles, Mode::MultiSplit},
Options::Arguments::One,
[&](Options* o, const std::string& argument) { output = argument; })
.add("--unescape",
Expand Down Expand Up @@ -407,6 +436,7 @@ bool WasmSplitOptions::validate() {
}
switch (mode) {
case Mode::Split:
case Mode::MultiSplit:
case Mode::Instrument:
if (inputFiles.size() > 1) {
fail("Cannot have more than one input file.");
Expand Down
4 changes: 4 additions & 0 deletions src/tools/wasm-split/split-options.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const std::string DEFAULT_PROFILE_EXPORT("__write_profile");
struct WasmSplitOptions : ToolOptions {
enum class Mode : unsigned {
Split,
MultiSplit,
Instrument,
MergeProfiles,
PrintProfile,
Expand Down Expand Up @@ -68,6 +69,9 @@ struct WasmSplitOptions : ToolOptions {
std::string secondaryMemoryName;
std::string exportPrefix;

std::string manifestFile;
std::string outPrefix;

// A hack to ensure the split and instrumented modules have the same table
// size when using Emscripten's SPLIT_MODULE mode with dynamic linking. TODO:
// Figure out a more elegant solution for that use case and remove this.
Expand Down
84 changes: 84 additions & 0 deletions src/tools/wasm-split/wasm-split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,87 @@ void splitModule(const WasmSplitOptions& options) {
writeModule(*secondary, options.secondaryOutput, options);
}

void multiSplitModule(const WasmSplitOptions& options) {
if (options.manifestFile.empty()) {
Fatal() << "--multi-split requires --manifest";
}
if (options.output.empty()) {
Fatal() << "--multi-split requires --output";
}

std::ifstream manifest(options.manifestFile);
if (!manifest.is_open()) {
Fatal() << "File not found: " << options.manifestFile;
}

Module wasm;
parseInput(wasm, options);

// Map module names to the functions that should be in the modules.
std::map<std::string, std::unordered_set<std::string>> moduleFuncs;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not hard to figure these out, but some comments would be nice I think.

// The module for which we are currently parsing a set of functions.
std::string currModule;
// The set of functions we are currently inserting into.
std::unordered_set<std::string>* currFuncs = nullptr;
// Map functions to their modules to ensure no function is assigned to
// multiple modules.
std::unordered_map<std::string, std::string> funcModules;

std::string line;
bool newSection = true;
while (std::getline(manifest, line)) {
if (line.empty()) {
newSection = true;
continue;
}
if (newSection) {
currModule = line;
currFuncs = &moduleFuncs[line];
newSection = false;
continue;
}
assert(currFuncs);
currFuncs->insert(line);
auto [it, inserted] = funcModules.insert({line, currModule});
if (!inserted && it->second != currModule) {
Fatal() << "Function " << line << "cannot be assigned to module "
<< currModule << "; it is already assigned to module "
<< it->second << '\n';
}
if (inserted && !options.quiet && !wasm.getFunctionOrNull(line)) {
std::cerr << "warning: Function " << line << " does not exist\n";
}
}

ModuleSplitting::Config config;
config.usePlaceholders = false;
config.importNamespace = "";
config.minimizeNewExportNames = true;
for (auto& func : wasm.functions) {
config.primaryFuncs.insert(func->name);
}
for (auto& [mod, funcs] : moduleFuncs) {
if (options.verbose) {
std::cerr << "Splitting module " << mod << '\n';
}
if (!options.quiet && funcs.empty()) {
std::cerr << "warning: Module " << mod << " will be empty\n";
}
for (auto& func : funcs) {
config.primaryFuncs.erase(Name(func));
}
auto splitResults = ModuleSplitting::splitFunctions(wasm, config);
// TODO: symbolMap, placeholderMap, emitModuleNames
// TODO: Support --emit-text and use .wast in that case.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe error on these for now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our special option validation that checks that each flag is allowed in the selected mode already takes care of that.

auto moduleName = options.outPrefix + mod + ".wasm";
PassRunner runner(&*splitResults.secondary);
runner.add("remove-unused-module-elements");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed here but not in single-split?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably add it to the normal split mode as well. In experiments with hundreds of relatively small modules, this was obviously very necessary. I guess it's harder to tell by visual inspection that it is necessary for larger secondary modules.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Maybe add a TODO for the normal mode?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just uploaded #6945 to do this for all secondary modules.

runner.run();
writeModule(*splitResults.secondary, moduleName, options);
}
writeModule(wasm, options.output, options);
}

void mergeProfiles(const WasmSplitOptions& options) {
// Read the initial profile. We will merge other profiles into this one.
ProfileData data = readProfile(options.inputFiles[0]);
Expand Down Expand Up @@ -503,6 +584,9 @@ int main(int argc, const char* argv[]) {
case WasmSplitOptions::Mode::Split:
splitModule(options);
break;
case WasmSplitOptions::Mode::MultiSplit:
multiSplitModule(options);
break;
case WasmSplitOptions::Mode::Instrument:
instrumentModule(options);
break;
Expand Down
23 changes: 20 additions & 3 deletions test/lit/help/wasm-split.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
;; CHECK-NEXT: --split Split an input module into two output
;; CHECK-NEXT: modules. The default mode.
;; CHECK-NEXT:
;; CHECK-NEXT: --multi-split Split an input module into an arbitrary
;; CHECK-NEXT: number of output modules.
;; CHECK-NEXT:
;; CHECK-NEXT: --instrument Instrument an input module to allow it to
;; CHECK-NEXT: generate a profile that can be used to
;; CHECK-NEXT: guide splitting.
Expand Down Expand Up @@ -43,6 +46,18 @@
;; CHECK-NEXT: can also pass a file with one function
;; CHECK-NEXT: per line by passing @filename.
;; CHECK-NEXT:
;; CHECK-NEXT: --manifest [multi-split] File describing the
;; CHECK-NEXT: functions to be split into each module.
;; CHECK-NEXT: Each section separated by a blank line
;; CHECK-NEXT: begins with the base name of an output
;; CHECK-NEXT: module, which is followed by a list of
;; CHECK-NEXT: functions to place in that module, one
;; CHECK-NEXT: per line.
;; CHECK-NEXT:
;; CHECK-NEXT: --out-prefix [multi-split] Prefix prepended to module
;; CHECK-NEXT: names in the manifest file to create
;; CHECK-NEXT: output file names.
;; CHECK-NEXT:
;; CHECK-NEXT: --primary-output,-o1 [split] Output file for the primary
;; CHECK-NEXT: module.
;; CHECK-NEXT:
Expand Down Expand Up @@ -125,10 +140,12 @@
;; CHECK-NEXT: --emit-text,-S [split, instrument] Emit text instead of
;; CHECK-NEXT: binary for the output file or files.
;; CHECK-NEXT:
;; CHECK-NEXT: --debuginfo,-g [split, instrument] Emit names section in
;; CHECK-NEXT: wasm binary (or full debuginfo in wast)
;; CHECK-NEXT: --debuginfo,-g [split, multi-split, instrument] Emit
;; CHECK-NEXT: names section in wasm binary (or full
;; CHECK-NEXT: debuginfo in wast)
;; CHECK-NEXT:
;; CHECK-NEXT: --output,-o [instrument, merge-profiles] Output file.
;; CHECK-NEXT: --output,-o [instrument, merge-profiles, multi-split]
;; CHECK-NEXT: Output file.
;; CHECK-NEXT:
;; CHECK-NEXT: --unescape,-u Un-escape function names (in
;; CHECK-NEXT: print-profile output)
Expand Down
Loading
Loading