Skip to content

Commit

Permalink
src: compile code eagerly in snapshot builder
Browse files Browse the repository at this point in the history
By default V8 only compiles the top-level function and
skips code generation for inner functions - that would
only be done when those inner functions are invoked.
Since builtins are compiled as wrapped functions, most
functions that look visually top-level are not actually
included in the built-in code cache. For most of the
builtins this is not too bad because usually only a subset of
all builtin functions are needed by a particular
application and including all their code in the binary
would incur an unnecessary size overhead. But there is also
a subset of more commonly used builtins and it would be
better to include the inner functions in the built-in
code cache because they are more universally used by
most applications.

This patch changes the compilation strategy to eager compilation
(including inner functions) for the following scripts:

1. Primordials (internal/per_context/*), in all situations.
2. Bootstrap scripts (internal/bootstrap/*) and main scripts
   (internal/main/*), when being compiled for built-in code
   cache.
3. Any scripts loaded during built-in snapshot generation.

With this patch the binary size increases by about 1.2MB (~1%
increase) in return the startup of --version command of bundled
CLIs and the core startup of processes are 2-4% faster and
worker startup can be 10-12% faster.
  • Loading branch information
joyeecheung committed Feb 15, 2024
1 parent fc80168 commit e262e31
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 18 deletions.
3 changes: 3 additions & 0 deletions src/api/environment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,9 @@ Maybe<bool> InitializePrimordials(Local<Context> context) {
// relatively cheap and all the scripts that we may want to run at
// startup are always present in it.
thread_local builtins::BuiltinLoader builtin_loader;
// Primordials can always be just eagerly compiled.
builtin_loader.SetEagerCompile();

for (const char** module = context_files; *module != nullptr; module++) {
Local<Value> arguments[] = {exports, primordials};
if (builtin_loader
Expand Down
9 changes: 9 additions & 0 deletions src/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,15 @@ Environment::Environment(IsolateData* isolate_data,
}
}

// We are supposed to call builtin_loader_.SetEagerCompile() in
// snapshot mode here because it's beneficial to compile built-ins
// loaded in the snapshot eagerly and include the code of inner functions
// that are likely to be used by user since they are part of the core
// startup. But this requires us to start the coverage collections
// before Environment/Context creation which is not currently possible.
// TODO(joyeecheung): refactor V8ProfilerConnection classes to parse
// JSON without v8 and lift this restriction.

// We'll be creating new objects so make sure we've entered the context.
HandleScope handle_scope(isolate);

Expand Down
42 changes: 34 additions & 8 deletions src/node_builtins.cc
Original file line number Diff line number Diff line change
Expand Up @@ -286,15 +286,24 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal(
ScriptCompiler::CompileOptions options =
has_cache ? ScriptCompiler::kConsumeCodeCache
: ScriptCompiler::kNoCompileOptions;
if (should_eager_compile_) {
options = ScriptCompiler::kEagerCompile;
} else if (!to_eager_compile_.empty()) {
if (to_eager_compile_.find(id) != to_eager_compile_.end()) {
options = ScriptCompiler::kEagerCompile;
}
}
ScriptCompiler::Source script_source(
source,
origin,
has_cache ? cached_data.AsCachedData().release() : nullptr);

per_process::Debug(DebugCategory::CODE_CACHE,
"Compiling %s %s code cache\n",
id,
has_cache ? "with" : "without");
per_process::Debug(
DebugCategory::CODE_CACHE,
"Compiling %s %s code cache %s\n",
id,
has_cache ? "with" : "without",
options == ScriptCompiler::kEagerCompile ? "eagerly" : "lazily");

MaybeLocal<Function> maybe_fun =
ScriptCompiler::CompileFunction(context,
Expand Down Expand Up @@ -481,14 +490,33 @@ MaybeLocal<Value> BuiltinLoader::CompileAndCall(Local<Context> context,
return fn->Call(context, undefined, argc, argv);
}

bool BuiltinLoader::CompileAllBuiltins(Local<Context> context) {
bool BuiltinLoader::CompileAllBuiltinsAndCopyCodeCache(
Local<Context> context,
const std::vector<std::string>& eager_builtins,
std::vector<CodeCacheInfo>* out) {
std::vector<std::string_view> ids = GetBuiltinIds();
bool all_succeeded = true;
std::string v8_tools_prefix = "internal/deps/v8/tools/";
std::string primordial_prefix = "internal/per_context/";
std::string bootstrap_prefix = "internal/bootstrap/";
std::string main_prefix = "internal/main/";
to_eager_compile_ = std::unordered_set<std::string>(eager_builtins.begin(),
eager_builtins.end());

for (const auto& id : ids) {
if (id.compare(0, v8_tools_prefix.size(), v8_tools_prefix) == 0) {
// No need to generate code cache for v8 scripts.
continue;
}

// Eagerly compile primordials/boostrap/main scripts during code cache
// generation.
if (id.compare(0, primordial_prefix.size(), primordial_prefix) == 0 ||
id.compare(0, bootstrap_prefix.size(), bootstrap_prefix) == 0 ||
id.compare(0, main_prefix.size(), main_prefix) == 0) {
to_eager_compile_.emplace(id);
}

v8::TryCatch bootstrapCatch(context->GetIsolate());
auto fn = LookupAndCompile(context, id.data(), nullptr);
if (bootstrapCatch.HasCaught()) {
Expand All @@ -503,14 +531,12 @@ bool BuiltinLoader::CompileAllBuiltins(Local<Context> context) {
SaveCodeCache(id.data(), fn.ToLocalChecked());
}
}
return all_succeeded;
}

void BuiltinLoader::CopyCodeCache(std::vector<CodeCacheInfo>* out) const {
RwLock::ScopedReadLock lock(code_cache_->mutex);
for (auto const& item : code_cache_->map) {
out->push_back({item.first, item.second});
}
return all_succeeded;
}

void BuiltinLoader::RefreshCodeCache(const std::vector<CodeCacheInfo>& in) {
Expand Down
26 changes: 21 additions & 5 deletions src/node_builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <unordered_set>
#include <vector>
#include "node_external_reference.h"
#include "node_mutex.h"
Expand Down Expand Up @@ -112,12 +112,18 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
bool Exists(const char* id);
bool Add(const char* id, const UnionBytes& source);

bool CompileAllBuiltins(v8::Local<v8::Context> context);
bool CompileAllBuiltinsAndCopyCodeCache(
v8::Local<v8::Context> context,
const std::vector<std::string>& lazy_builtins,
std::vector<CodeCacheInfo>* out);
void RefreshCodeCache(const std::vector<CodeCacheInfo>& in);
void CopyCodeCache(std::vector<CodeCacheInfo>* out) const;

void CopySourceAndCodeCacheReferenceFrom(const BuiltinLoader* other);

std::vector<std::string_view> GetBuiltinIds() const;

void SetEagerCompile() { should_eager_compile_ = true; }

private:
// Only allow access from friends.
friend class CodeCacheBuilder;
Expand All @@ -126,8 +132,6 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
void LoadJavaScriptSource(); // Loads data into source_
UnionBytes GetConfig(); // Return data for config.gypi

std::vector<std::string_view> GetBuiltinIds() const;

struct BuiltinCategories {
std::set<std::string> can_be_required;
std::set<std::string> cannot_be_required;
Expand Down Expand Up @@ -179,6 +183,17 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {

const UnionBytes config_;

// If any bulitins should be eagerly compiled i.e. with inner functions
// compiled too, either use should_eager_compile_ to compile all builtins
// eagerly, or use to_eager_compile_ to compile specific builtins eagerly.
// Currently we set should_eager_compile_ to true when building the snapshot,
// and use to_eager_compile_ to compile code cache that complements the
// snapshot, where builtins already loaded in the snapshot and a few extras
// are compiled eagerly. At runtime any additional compilation is done
// lazily (except primordials in no-snapshot mode).
bool should_eager_compile_ = false;
std::unordered_set<std::string> to_eager_compile_;

struct BuiltinCodeCache {
RwLock mutex;
BuiltinCodeCacheMap map;
Expand All @@ -188,6 +203,7 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {

friend class ::PerProcessTest;
};

} // namespace builtins

} // namespace node
Expand Down
19 changes: 14 additions & 5 deletions src/node_snapshotable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1069,10 +1069,12 @@ ExitCode BuildCodeCacheFromSnapshot(SnapshotData* out,
Context::Scope context_scope(context);
builtins::BuiltinLoader builtin_loader;
// Regenerate all the code cache.
if (!builtin_loader.CompileAllBuiltins(context)) {
if (!builtin_loader.CompileAllBuiltinsAndCopyCodeCache(
context,
out->env_info.principal_realm.builtins,
&(out->code_cache))) {
return ExitCode::kGenericUserError;
}
builtin_loader.CopyCodeCache(&(out->code_cache));
if (per_process::enabled_debug_list.enabled(DebugCategory::MKSNAPSHOT)) {
for (const auto& item : out->code_cache) {
std::string size_str = FormatSize(item.data.length);
Expand All @@ -1098,6 +1100,9 @@ ExitCode SnapshotBuilder::Generate(
}

if (!WithoutCodeCache(snapshot_config)) {
per_process::Debug(
DebugCategory::CODE_CACHE,
"---\nGenerate code cache to complement snapshot\n---\n");
// Deserialize the snapshot to recompile code cache. We need to do this in
// the second pass because V8 requires the code cache to be compiled with a
// finalized read-only space.
Expand Down Expand Up @@ -1176,10 +1181,14 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,
CHECK_EQ(index, SnapshotData::kNodeMainContextIndex);
}

// Must be out of HandleScope
// Must be out of HandleScope.
// Clear the code in the snapshot because we can't compile them eagerly
// during snapshot generation yet. We'll rely on the accompanying eagerly
// compiled code cache instead.
// TODO(joyeecheung): use FunctionCodeHandling::kKeep when we can start
// profilers before Environment/Context creation.
SnapshotCreator::FunctionCodeHandling handling =
WithoutCodeCache(*config) ? SnapshotCreator::FunctionCodeHandling::kClear
: SnapshotCreator::FunctionCodeHandling::kKeep;
SnapshotCreator::FunctionCodeHandling::kClear;
out->v8_snapshot_blob_data = creator->CreateBlob(handling);

// We must be able to rehash the blob when we restore it or otherwise
Expand Down

0 comments on commit e262e31

Please sign in to comment.