diff --git a/src/tools/wasm-split/split-options.cpp b/src/tools/wasm-split/split-options.cpp index 825efddd973..e77957f1fb4 100644 --- a/src/tools/wasm-split/split-options.cpp +++ b/src/tools/wasm-split/split-options.cpp @@ -61,6 +61,9 @@ std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) { case WasmSplitOptions::Mode::Split: o << "split"; break; + case WasmSplitOptions::Mode::MultiSplit: + o << "multi-split"; + break; case WasmSplitOptions::Mode::Instrument: o << "instrument"; break; @@ -91,7 +94,14 @@ WasmSplitOptions::WasmSplitOptions() "Split an input module into two output modules. The default mode.", WasmSplitOption, Options::Arguments::Zero, - [&](Options* o, const std::string& arugment) { mode = Mode::Split; }) + [&](Options* o, const std::string& argument) { mode = Mode::Split; }) + .add( + "--multi-split", + "", + "Split an input module into an arbitrary number of output modules.", + WasmSplitOption, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { mode = Mode::MultiSplit; }) .add( "--instrument", "", @@ -151,6 +161,25 @@ WasmSplitOptions::WasmSplitOptions() [&](Options* o, const std::string& argument) { splitFuncs = parseNameList(argument); }) + .add( + "--manifest", + "", + "File describing the functions to be split into each module. Each " + "section separated by a blank line begins with the base name of an " + "output module, which is followed by a list of functions to place in " + "that module, one per line.", + WasmSplitOption, + {Mode::MultiSplit}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { manifestFile = argument; }) + .add("--out-prefix", + "", + "Prefix prepended to module names in the manifest file to create " + "output file names.", + WasmSplitOption, + {Mode::MultiSplit}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { outPrefix = argument; }) .add("--primary-output", "-o1", "Output file for the primary module.", @@ -313,7 +342,7 @@ WasmSplitOptions::WasmSplitOptions() "-g", "Emit names section in wasm binary (or full debuginfo in wast)", WasmSplitOption, - {Mode::Split, Mode::Instrument}, + {Mode::Split, Mode::MultiSplit, Mode::Instrument}, Options::Arguments::Zero, [&](Options* o, const std::string& arguments) { passOptions.debugInfo = true; @@ -322,7 +351,7 @@ WasmSplitOptions::WasmSplitOptions() "-o", "Output file.", WasmSplitOption, - {Mode::Instrument, Mode::MergeProfiles}, + {Mode::Instrument, Mode::MergeProfiles, Mode::MultiSplit}, Options::Arguments::One, [&](Options* o, const std::string& argument) { output = argument; }) .add("--unescape", @@ -407,6 +436,7 @@ bool WasmSplitOptions::validate() { } switch (mode) { case Mode::Split: + case Mode::MultiSplit: case Mode::Instrument: if (inputFiles.size() > 1) { fail("Cannot have more than one input file."); diff --git a/src/tools/wasm-split/split-options.h b/src/tools/wasm-split/split-options.h index b8129f29bf0..105c90c80ee 100644 --- a/src/tools/wasm-split/split-options.h +++ b/src/tools/wasm-split/split-options.h @@ -26,6 +26,7 @@ const std::string DEFAULT_PROFILE_EXPORT("__write_profile"); struct WasmSplitOptions : ToolOptions { enum class Mode : unsigned { Split, + MultiSplit, Instrument, MergeProfiles, PrintProfile, @@ -68,6 +69,9 @@ struct WasmSplitOptions : ToolOptions { std::string secondaryMemoryName; std::string exportPrefix; + std::string manifestFile; + std::string outPrefix; + // A hack to ensure the split and instrumented modules have the same table // size when using Emscripten's SPLIT_MODULE mode with dynamic linking. TODO: // Figure out a more elegant solution for that use case and remove this. diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp index cb148090d64..ea1734b6b09 100644 --- a/src/tools/wasm-split/wasm-split.cpp +++ b/src/tools/wasm-split/wasm-split.cpp @@ -362,6 +362,87 @@ void splitModule(const WasmSplitOptions& options) { writeModule(*secondary, options.secondaryOutput, options); } +void multiSplitModule(const WasmSplitOptions& options) { + if (options.manifestFile.empty()) { + Fatal() << "--multi-split requires --manifest"; + } + if (options.output.empty()) { + Fatal() << "--multi-split requires --output"; + } + + std::ifstream manifest(options.manifestFile); + if (!manifest.is_open()) { + Fatal() << "File not found: " << options.manifestFile; + } + + Module wasm; + parseInput(wasm, options); + + // Map module names to the functions that should be in the modules. + std::map> moduleFuncs; + // The module for which we are currently parsing a set of functions. + std::string currModule; + // The set of functions we are currently inserting into. + std::unordered_set* currFuncs = nullptr; + // Map functions to their modules to ensure no function is assigned to + // multiple modules. + std::unordered_map funcModules; + + std::string line; + bool newSection = true; + while (std::getline(manifest, line)) { + if (line.empty()) { + newSection = true; + continue; + } + if (newSection) { + currModule = line; + currFuncs = &moduleFuncs[line]; + newSection = false; + continue; + } + assert(currFuncs); + currFuncs->insert(line); + auto [it, inserted] = funcModules.insert({line, currModule}); + if (!inserted && it->second != currModule) { + Fatal() << "Function " << line << "cannot be assigned to module " + << currModule << "; it is already assigned to module " + << it->second << '\n'; + } + if (inserted && !options.quiet && !wasm.getFunctionOrNull(line)) { + std::cerr << "warning: Function " << line << " does not exist\n"; + } + } + + ModuleSplitting::Config config; + config.usePlaceholders = false; + config.importNamespace = ""; + config.minimizeNewExportNames = true; + for (auto& func : wasm.functions) { + config.primaryFuncs.insert(func->name); + } + for (auto& [mod, funcs] : moduleFuncs) { + if (options.verbose) { + std::cerr << "Splitting module " << mod << '\n'; + } + if (!options.quiet && funcs.empty()) { + std::cerr << "warning: Module " << mod << " will be empty\n"; + } + for (auto& func : funcs) { + config.primaryFuncs.erase(Name(func)); + } + auto splitResults = ModuleSplitting::splitFunctions(wasm, config); + // TODO: symbolMap, placeholderMap, emitModuleNames + // TODO: Support --emit-text and use .wast in that case. + auto moduleName = options.outPrefix + mod + ".wasm"; + PassRunner runner(&*splitResults.secondary); + runner.add("remove-unused-module-elements"); + runner.run(); + writeModule(*splitResults.secondary, moduleName, options); + } + writeModule(wasm, options.output, options); +} + void mergeProfiles(const WasmSplitOptions& options) { // Read the initial profile. We will merge other profiles into this one. ProfileData data = readProfile(options.inputFiles[0]); @@ -503,6 +584,9 @@ int main(int argc, const char* argv[]) { case WasmSplitOptions::Mode::Split: splitModule(options); break; + case WasmSplitOptions::Mode::MultiSplit: + multiSplitModule(options); + break; case WasmSplitOptions::Mode::Instrument: instrumentModule(options); break; diff --git a/test/lit/help/wasm-split.test b/test/lit/help/wasm-split.test index 4fa534e43ca..e5e73656205 100644 --- a/test/lit/help/wasm-split.test +++ b/test/lit/help/wasm-split.test @@ -15,6 +15,9 @@ ;; CHECK-NEXT: --split Split an input module into two output ;; CHECK-NEXT: modules. The default mode. ;; CHECK-NEXT: +;; CHECK-NEXT: --multi-split Split an input module into an arbitrary +;; CHECK-NEXT: number of output modules. +;; CHECK-NEXT: ;; CHECK-NEXT: --instrument Instrument an input module to allow it to ;; CHECK-NEXT: generate a profile that can be used to ;; CHECK-NEXT: guide splitting. @@ -43,6 +46,18 @@ ;; CHECK-NEXT: can also pass a file with one function ;; CHECK-NEXT: per line by passing @filename. ;; CHECK-NEXT: +;; CHECK-NEXT: --manifest [multi-split] File describing the +;; CHECK-NEXT: functions to be split into each module. +;; CHECK-NEXT: Each section separated by a blank line +;; CHECK-NEXT: begins with the base name of an output +;; CHECK-NEXT: module, which is followed by a list of +;; CHECK-NEXT: functions to place in that module, one +;; CHECK-NEXT: per line. +;; CHECK-NEXT: +;; CHECK-NEXT: --out-prefix [multi-split] Prefix prepended to module +;; CHECK-NEXT: names in the manifest file to create +;; CHECK-NEXT: output file names. +;; CHECK-NEXT: ;; CHECK-NEXT: --primary-output,-o1 [split] Output file for the primary ;; CHECK-NEXT: module. ;; CHECK-NEXT: @@ -125,10 +140,12 @@ ;; CHECK-NEXT: --emit-text,-S [split, instrument] Emit text instead of ;; CHECK-NEXT: binary for the output file or files. ;; CHECK-NEXT: -;; CHECK-NEXT: --debuginfo,-g [split, instrument] Emit names section in -;; CHECK-NEXT: wasm binary (or full debuginfo in wast) +;; CHECK-NEXT: --debuginfo,-g [split, multi-split, instrument] Emit +;; CHECK-NEXT: names section in wasm binary (or full +;; CHECK-NEXT: debuginfo in wast) ;; CHECK-NEXT: -;; CHECK-NEXT: --output,-o [instrument, merge-profiles] Output file. +;; CHECK-NEXT: --output,-o [instrument, merge-profiles, multi-split] +;; CHECK-NEXT: Output file. ;; CHECK-NEXT: ;; CHECK-NEXT: --unescape,-u Un-escape function names (in ;; CHECK-NEXT: print-profile output) diff --git a/test/lit/wasm-split/multi-split.wast b/test/lit/wasm-split/multi-split.wast new file mode 100644 index 00000000000..9206e60eee5 --- /dev/null +++ b/test/lit/wasm-split/multi-split.wast @@ -0,0 +1,220 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-split -all -g --multi-split %s --manifest %s.manifest --out-prefix=%t -o %t.wasm +;; RUN: wasm-dis %t.wasm | filecheck %s --check-prefix=PRIMARY +;; RUN: wasm-dis %t1.wasm | filecheck %s --check-prefix=CHECK-A +;; RUN: wasm-dis %t2.wasm | filecheck %s --check-prefix=CHECK-B +;; RUN: wasm-dis %t3.wasm | filecheck %s --check-prefix=CHECK-C + +(module + (type $ret-i32 (func (result i32))) + ;; PRIMARY: (type $ret-i64 (func (result i64))) + (type $ret-i64 (func (result i64))) + ;; PRIMARY: (type $ret-f32 (func (result f32))) + (type $ret-f32 (func (result f32))) + ;; CHECK-A: (type $0 (func (result i64))) + + ;; CHECK-A: (type $1 (func (result f32))) + + ;; CHECK-A: (type $2 (func (result i32))) + + ;; CHECK-A: (import "" "table" (table $timport$0 1 funcref)) + + ;; CHECK-A: (import "" "a" (func $B (result i64))) + + ;; CHECK-A: (import "" "b" (func $C (result f32))) + + ;; CHECK-A: (elem $0 (i32.const 0) $A) + + ;; CHECK-A: (func $A (result i32) + ;; CHECK-A-NEXT: (drop + ;; CHECK-A-NEXT: (call_ref $2 + ;; CHECK-A-NEXT: (ref.func $A) + ;; CHECK-A-NEXT: ) + ;; CHECK-A-NEXT: ) + ;; CHECK-A-NEXT: (drop + ;; CHECK-A-NEXT: (call_ref $0 + ;; CHECK-A-NEXT: (ref.func $B) + ;; CHECK-A-NEXT: ) + ;; CHECK-A-NEXT: ) + ;; CHECK-A-NEXT: (drop + ;; CHECK-A-NEXT: (call_ref $1 + ;; CHECK-A-NEXT: (ref.func $C) + ;; CHECK-A-NEXT: ) + ;; CHECK-A-NEXT: ) + ;; CHECK-A-NEXT: (i32.const 0) + ;; CHECK-A-NEXT: ) + (func $A (type $ret-i32) (result i32) + (drop + (call_ref $ret-i32 + (ref.func $A) + ) + ) + (drop + (call_ref $ret-i64 + (ref.func $B) + ) + ) + (drop + (call_ref $ret-f32 + (ref.func $C) + ) + ) + (i32.const 0) + ) + ;; CHECK-B: (type $0 (func (result i32))) + + ;; CHECK-B: (type $1 (func (result f32))) + + ;; CHECK-B: (type $2 (func (result i64))) + + ;; CHECK-B: (import "" "table_3" (table $timport$0 2 funcref)) + + ;; CHECK-B: (import "" "table" (table $timport$1 1 funcref)) + + ;; CHECK-B: (import "" "b" (func $C (result f32))) + + ;; CHECK-B: (elem $0 (table $timport$0) (i32.const 0) func $B $1) + + ;; CHECK-B: (func $B (result i64) + ;; CHECK-B-NEXT: (drop + ;; CHECK-B-NEXT: (call_ref $0 + ;; CHECK-B-NEXT: (ref.func $1) + ;; CHECK-B-NEXT: ) + ;; CHECK-B-NEXT: ) + ;; CHECK-B-NEXT: (drop + ;; CHECK-B-NEXT: (call_ref $2 + ;; CHECK-B-NEXT: (ref.func $B) + ;; CHECK-B-NEXT: ) + ;; CHECK-B-NEXT: ) + ;; CHECK-B-NEXT: (drop + ;; CHECK-B-NEXT: (call_ref $1 + ;; CHECK-B-NEXT: (ref.func $C) + ;; CHECK-B-NEXT: ) + ;; CHECK-B-NEXT: ) + ;; CHECK-B-NEXT: (i64.const 0) + ;; CHECK-B-NEXT: ) + (func $B (type $ret-i64) (result i64) + (drop + (call_ref $ret-i32 + (ref.func $A) + ) + ) + (drop + (call_ref $ret-i64 + (ref.func $B) + ) + ) + (drop + (call_ref $ret-f32 + (ref.func $C) + ) + ) + (i64.const 0) + ) + ;; CHECK-C: (type $0 (func (result i64))) + + ;; CHECK-C: (type $1 (func (result i32))) + + ;; CHECK-C: (type $2 (func (result f32))) + + ;; CHECK-C: (import "" "table_4" (table $timport$0 2 funcref)) + + ;; CHECK-C: (import "" "table_3" (table $timport$1 2 funcref)) + + ;; CHECK-C: (elem $0 (table $timport$0) (i32.const 0) func $0 $C) + + ;; CHECK-C: (func $0 (result i64) + ;; CHECK-C-NEXT: (call_indirect (type $0) + ;; CHECK-C-NEXT: (i32.const 0) + ;; CHECK-C-NEXT: ) + ;; CHECK-C-NEXT: ) + + ;; CHECK-C: (func $C (result f32) + ;; CHECK-C-NEXT: (drop + ;; CHECK-C-NEXT: (call_ref $1 + ;; CHECK-C-NEXT: (ref.func $3) + ;; CHECK-C-NEXT: ) + ;; CHECK-C-NEXT: ) + ;; CHECK-C-NEXT: (drop + ;; CHECK-C-NEXT: (call_ref $0 + ;; CHECK-C-NEXT: (ref.func $2) + ;; CHECK-C-NEXT: ) + ;; CHECK-C-NEXT: ) + ;; CHECK-C-NEXT: (drop + ;; CHECK-C-NEXT: (call_ref $2 + ;; CHECK-C-NEXT: (ref.func $C) + ;; CHECK-C-NEXT: ) + ;; CHECK-C-NEXT: ) + ;; CHECK-C-NEXT: (f32.const 0) + ;; CHECK-C-NEXT: ) + (func $C (type $ret-f32) (result f32) + (drop + (call_ref $ret-i32 + (ref.func $A) + ) + ) + (drop + (call_ref $ret-i64 + (ref.func $B) + ) + ) + (drop + (call_ref $ret-f32 + (ref.func $C) + ) + ) + (f32.const 0) + ) +) +;; PRIMARY: (table $0 1 funcref) + +;; PRIMARY: (table $1 2 funcref) + +;; PRIMARY: (table $2 2 funcref) + +;; PRIMARY: (elem $0 (table $0) (i32.const 0) funcref (item (ref.null nofunc))) + +;; PRIMARY: (elem $1 (table $1) (i32.const 0) funcref (item (ref.null nofunc)) (item (ref.null nofunc))) + +;; PRIMARY: (elem $2 (table $2) (i32.const 0) funcref (item (ref.null nofunc)) (item (ref.null nofunc))) + +;; PRIMARY: (export "a" (func $0)) + +;; PRIMARY: (export "b" (func $1)) + +;; PRIMARY: (export "table" (table $0)) + +;; PRIMARY: (export "table_3" (table $1)) + +;; PRIMARY: (export "table_4" (table $2)) + +;; PRIMARY: (func $0 (result i64) +;; PRIMARY-NEXT: (call_indirect (type $ret-i64) +;; PRIMARY-NEXT: (i32.const 0) +;; PRIMARY-NEXT: ) +;; PRIMARY-NEXT: ) + +;; PRIMARY: (func $1 (result f32) +;; PRIMARY-NEXT: (call_indirect (type $ret-f32) +;; PRIMARY-NEXT: (i32.const 1) +;; PRIMARY-NEXT: ) +;; PRIMARY-NEXT: ) + +;; CHECK-B: (func $1 (result i32) +;; CHECK-B-NEXT: (call_indirect (type $0) +;; CHECK-B-NEXT: (i32.const 0) +;; CHECK-B-NEXT: ) +;; CHECK-B-NEXT: ) + +;; CHECK-C: (func $2 (result i64) +;; CHECK-C-NEXT: (call_indirect (type $0) +;; CHECK-C-NEXT: (i32.const 0) +;; CHECK-C-NEXT: ) +;; CHECK-C-NEXT: ) + +;; CHECK-C: (func $3 (result i32) +;; CHECK-C-NEXT: (call_indirect (type $1) +;; CHECK-C-NEXT: (i32.const 1) +;; CHECK-C-NEXT: ) +;; CHECK-C-NEXT: ) diff --git a/test/lit/wasm-split/multi-split.wast.manifest b/test/lit/wasm-split/multi-split.wast.manifest new file mode 100644 index 00000000000..f6e710feda1 --- /dev/null +++ b/test/lit/wasm-split/multi-split.wast.manifest @@ -0,0 +1,8 @@ +1 +A + +2 +B + +3 +C